Refactor Dashboard Canvas and Units for Improved Layout and Performance

- Updated BlankCanvas.tsx to utilize new dashboard size utilities for dynamic unit sizing and spacing.
- Enhanced BlankDashboardUnit.tsx to simplify unit rendering and remove unnecessary props.
- Modified BlankRow.tsx to eliminate redundant props and streamline unit rendering.
- Refactored Index.tsx to implement grid layout using CSS properties for better responsiveness.
- Improved DashboardBaseComponent.tsx by removing unused pixel helpers and optimizing drag/resize logic.
- Updated OpenTelemetry profiles roadmap documentation to reflect completed phases and remaining tasks.
This commit is contained in:
Nawaz Dhandala
2026-03-27 12:26:59 +00:00
parent 465cc798ec
commit a48e8a2710
6 changed files with 140 additions and 518 deletions

View File

@@ -1,5 +1,8 @@
import React, { FunctionComponent, ReactElement } from "react";
import DefaultDashboardSize from "Common/Types/Dashboard/DashboardSize";
import DefaultDashboardSize, {
GetDashboardUnitWidthInPx,
SpaceBetweenUnitsInPx,
} from "Common/Types/Dashboard/DashboardSize";
import BlankRowElement from "./BlankRow";
import DashboardViewConfig from "Common/Types/Dashboard/DashboardViewConfig";
@@ -21,7 +24,10 @@ const BlankCanvasElement: FunctionComponent<ComponentProps> = (
if (!props.isEditMode && props.dashboardViewConfig.components.length === 0) {
return (
<div className="mx-3 mt-4 rounded-2xl border border-dashed border-gray-200 bg-gray-50/50 text-center py-20 px-10" style={{ boxShadow: "0 2px 8px -2px rgba(0, 0, 0, 0.06)" }}>
<div
className="mx-3 mt-4 rounded-2xl border border-dashed border-gray-200 bg-gray-50/50 text-center py-20 px-10"
style={{ boxShadow: "0 2px 8px -2px rgba(0, 0, 0, 0.06)" }}
>
<div
className="mx-auto w-14 h-14 rounded-full bg-white border border-gray-200 flex items-center justify-center mb-4"
style={{ boxShadow: "0 1px 3px 0 rgba(0, 0, 0, 0.04)" }}
@@ -50,28 +56,32 @@ const BlankCanvasElement: FunctionComponent<ComponentProps> = (
);
}
// have a grid with width cols and height rows
const gap: number = SpaceBetweenUnitsInPx;
const unitSize: number = GetDashboardUnitWidthInPx(
props.totalCurrentDashboardWidthInPx,
);
return (
<div
className={`grid grid-cols-${width}`}
style={
props.isEditMode
style={{
display: "grid",
gridTemplateColumns: `repeat(${width}, 1fr)`,
gap: `${gap}px`,
gridAutoRows: `${unitSize}px`,
...(props.isEditMode
? {
backgroundImage:
"linear-gradient(to right, rgba(203, 213, 225, 0.3) 1px, transparent 1px), linear-gradient(to bottom, rgba(203, 213, 225, 0.3) 1px, transparent 1px)",
backgroundSize: "20px 20px",
borderRadius: "16px",
}
: {}
}
: {}),
}}
>
{Array.from(Array(height).keys()).map((_: number, index: number) => {
return (
<BlankRowElement
key={index}
totalCurrentDashboardWidthInPx={
props.totalCurrentDashboardWidthInPx
}
isEditMode={props.isEditMode}
rowNumber={index}
onClick={(top: number, left: number) => {

View File

@@ -1,47 +1,32 @@
import {
GetDashboardUnitHeightInPx,
MarginForEachUnitInPx,
} from "Common/Types/Dashboard/DashboardSize";
import React, { FunctionComponent, ReactElement } from "react";
export interface ComponentProps {
isEditMode: boolean;
onClick: () => void;
currentTotalDashboardWidthInPx: number;
id: string;
}
const BlankDashboardUnitElement: FunctionComponent<ComponentProps> = (
props: ComponentProps,
): ReactElement => {
const heightOfUnitInPx: number = GetDashboardUnitHeightInPx(
props.currentTotalDashboardWidthInPx,
);
const widthOfUnitInPx: number = heightOfUnitInPx; // its a square
let className: string = "transition-all duration-150";
if (props.isEditMode) {
className +=
" rounded-md cursor-pointer";
}
return (
<div
id={props.id}
className={className}
className={
props.isEditMode
? "rounded-md cursor-pointer transition-all duration-150"
: "transition-all duration-150"
}
onClick={() => {
props.onClick();
}}
style={{
width: widthOfUnitInPx + "px",
height: heightOfUnitInPx + "px",
margin: MarginForEachUnitInPx + "px",
border: props.isEditMode ? "1px solid rgba(203, 213, 225, 0.4)" : "none",
border: props.isEditMode
? "1px solid rgba(203, 213, 225, 0.4)"
: "none",
borderRadius: "6px",
}}
></div>
/>
);
};

View File

@@ -6,7 +6,6 @@ export interface ComponentProps {
rowNumber: number;
onClick: (top: number, left: number) => void;
isEditMode: boolean;
totalCurrentDashboardWidthInPx: number;
}
const BlankRowElement: FunctionComponent<ComponentProps> = (
@@ -20,9 +19,6 @@ const BlankRowElement: FunctionComponent<ComponentProps> = (
(_: number, index: number) => {
return (
<BlankDashboardUnitElement
currentTotalDashboardWidthInPx={
props.totalCurrentDashboardWidthInPx
}
key={props.rowNumber + "-" + index}
isEditMode={props.isEditMode}
onClick={() => {

View File

@@ -1,7 +1,10 @@
import React, { FunctionComponent, ReactElement } from "react";
import BlankCanvasElement from "./BlankCanvas";
import DashboardViewConfig from "Common/Types/Dashboard/DashboardViewConfig";
import DefaultDashboardSize from "Common/Types/Dashboard/DashboardSize";
import DefaultDashboardSize, {
GetDashboardUnitWidthInPx,
SpaceBetweenUnitsInPx,
} from "Common/Types/Dashboard/DashboardSize";
import DashboardBaseComponent from "Common/Types/Dashboard/DashboardComponents/DashboardBaseComponent";
import BlankDashboardUnitElement from "./BlankDashboardUnit";
import DashboardBaseComponentElement from "../Components/DashboardBaseComponent";
@@ -34,6 +37,11 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
const dashboardCanvasRef: React.RefObject<HTMLDivElement> =
React.useRef<HTMLDivElement>(null);
const gap: number = SpaceBetweenUnitsInPx;
const unitSize: number = GetDashboardUnitWidthInPx(
props.currentTotalDashboardWidthInPx,
);
const renderComponents: GetReactElementFunction = (): ReactElement => {
const canvasHeight: number =
props.dashboardViewConfig.heightInDashboardUnits ||
@@ -52,7 +60,7 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
grid[row] = new Array(canvasWidth).fill(null);
}
let maxHeightInDashboardUnits: number = 0; // max height of the grid
let maxHeightInDashboardUnits: number = 0;
// Place components in the grid
allComponents.forEach((component: DashboardBaseComponent) => {
@@ -106,16 +114,11 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
if (!component) {
if (!props.isEditMode && i >= maxHeightInDashboardUnits) {
// if we are not in edit mode, we should not render blank units
continue;
}
// render a blank unit
renderedComponents.push(
<BlankDashboardUnitElement
currentTotalDashboardWidthInPx={
props.currentTotalDashboardWidthInPx
}
isEditMode={props.isEditMode}
key={`blank-unit-${i}-${j}`}
onClick={() => {
@@ -128,8 +131,6 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
}
}
// remove nulls from the renderedComponents array
const finalRenderedComponents: Array<ReactElement> =
renderedComponents.filter(
(component: ReactElement | null): component is ReactElement => {
@@ -137,29 +138,27 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
},
);
const width: number = DefaultDashboardSize.widthInDashboardUnits;
const canvasClassName: string = `grid grid-cols-${width}`;
return (
<div
ref={dashboardCanvasRef}
className={canvasClassName}
style={
props.isEditMode
style={{
display: "grid",
gridTemplateColumns: `repeat(${canvasWidth}, 1fr)`,
gap: `${gap}px`,
gridAutoRows: `${unitSize}px`,
borderRadius: "16px",
...(props.isEditMode
? {
backgroundImage:
"radial-gradient(circle, rgba(148, 163, 184, 0.3) 0.8px, transparent 0.8px)",
backgroundSize: "20px 20px",
borderRadius: "16px",
padding: "8px",
border: "1px dashed rgba(148, 163, 184, 0.25)",
}
: {
padding: "8px",
borderRadius: "16px",
}
}
}),
}}
>
{finalRenderedComponents}
</div>
@@ -208,18 +207,21 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
props.selectedComponentId?.toString() === componentId.toString();
const component: DashboardBaseComponent | undefined =
props.dashboardViewConfig.components.find((c: DashboardBaseComponent) => {
return c.componentId.toString() === componentId.toString();
});
props.dashboardViewConfig.components.find(
(c: DashboardBaseComponent) => {
return c.componentId.toString() === componentId.toString();
},
);
const currentUnitSizeInPx: number =
props.currentTotalDashboardWidthInPx / 12;
const w: number = component?.widthInDashboardUnits || 0;
const h: number = component?.heightInDashboardUnits || 0;
// Compute pixel dimensions for child component rendering (charts, etc.)
const widthOfComponentInPx: number =
unitSize * w + gap * (w - 1);
const heightOfComponentInPx: number =
currentUnitSizeInPx * (component?.heightInDashboardUnits || 0);
const widthOfComponentInPx: number =
currentUnitSizeInPx * (component?.widthInDashboardUnits || 0);
unitSize * h + gap * (h - 1);
return (
<DashboardBaseComponentElement
@@ -232,7 +234,9 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
dashboardComponentHeightInPx={heightOfComponentInPx}
metricTypes={props.metrics.metricTypes}
dashboardStartAndEndDate={props.dashboardStartAndEndDate}
dashboardCanvasWidthInPx={dashboardCanvasRef.current?.clientWidth || 0}
dashboardCanvasWidthInPx={
dashboardCanvasRef.current?.clientWidth || 0
}
dashboardCanvasTopInPx={dashboardCanvasRef.current?.clientTop || 0}
dashboardCanvasLeftInPx={dashboardCanvasRef.current?.clientLeft || 0}
totalCurrentDashboardWidthInPx={props.currentTotalDashboardWidthInPx}
@@ -244,7 +248,6 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
isSelected={isSelected}
refreshTick={props.refreshTick}
onClick={() => {
// component is selected
props.onComponentSelected(componentId);
}}
/>
@@ -274,7 +277,6 @@ const DashboardCanvas: FunctionComponent<ComponentProps> = (
description="Edit the settings of this component"
dashboardViewConfig={props.dashboardViewConfig}
onClose={() => {
// unselect this component.
props.onComponentUnselected();
}}
onComponentDelete={() => {

View File

@@ -25,7 +25,6 @@ import DefaultDashboardSize, {
GetDashboardComponentWidthInDashboardUnits,
GetDashboardUnitHeightInPx,
GetDashboardUnitWidthInPx,
MarginForEachUnitInPx,
SpaceBetweenUnitsInPx,
} from "Common/Types/Dashboard/DashboardSize";
import { GetReactElementFunction } from "Common/UI/Types/FunctionTypes";
@@ -93,9 +92,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
// ── Minimal React state (only for hover gating) ───────────
const [isHovered, setIsHovered] = useState<boolean>(false);
// We track "is dragging" in a ref so the mousemove handler never
// depends on React state. A *second* copy in useState lets the
// JSX read it for className changes on mount/unmount of the drag.
const [isDragging, setIsDragging] = useState<boolean>(false);
// ── Refs ──────────────────────────────────────────────────
@@ -107,7 +103,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
useRef<DragSession | null>(null);
const overlayRef: React.MutableRefObject<HTMLDivElement | null> =
useRef<HTMLDivElement | null>(null);
// Keep latest props/component available for the imperative handlers.
const latestProps: React.MutableRefObject<ComponentProps> =
useRef<ComponentProps>(props);
const latestComponent: React.MutableRefObject<DashboardBaseComponent> =
@@ -115,14 +110,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
latestProps.current = props;
latestComponent.current = component;
// ── Pixel helpers ─────────────────────────────────────────
const unitW: number = GetDashboardUnitWidthInPx(
props.totalCurrentDashboardWidthInPx,
);
const unitH: number = GetDashboardUnitHeightInPx(
props.totalCurrentDashboardWidthInPx,
);
// ── Core imperative handlers (stable — no deps) ──────────
function updateTooltip(session: DragSession): void {
@@ -150,6 +137,7 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
const uH: number = GetDashboardUnitHeightInPx(
p.totalCurrentDashboardWidthInPx,
);
const g: number = SpaceBetweenUnitsInPx;
const dxPx: number = e.clientX - s.startMouseX;
const dyPx: number = e.clientY - s.startMouseY;
@@ -160,11 +148,9 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
}
if (s.mode === "move") {
// Pure CSS transform — no React render
el.style.transform = `translate(${dxPx}px, ${dyPx}px) scale(1.01)`;
el.style.zIndex = "100";
// Compute snapped grid position for the tooltip & commit
const dxUnits: number = Math.round(dxPx / uW);
const dyUnits: number = Math.round(dyPx / uH);
@@ -183,26 +169,33 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
updateTooltip(s);
} else {
// Resize modes — directly set width / height on the DOM element
const rect: DOMRect = el.getBoundingClientRect();
if (s.mode === "resize-w" || s.mode === "resize-corner") {
const wPx: number = Math.max(uW, e.pageX - (window.scrollX + rect.left));
const wPx: number = Math.max(
uW,
e.pageX - (window.scrollX + rect.left),
);
let wUnits: number = GetDashboardComponentWidthInDashboardUnits(
p.totalCurrentDashboardWidthInPx,
wPx,
);
wUnits = Math.max(c.minWidthInDashboardUnits, wUnits);
wUnits = Math.min(DefaultDashboardSize.widthInDashboardUnits, wUnits);
wUnits = Math.min(
DefaultDashboardSize.widthInDashboardUnits,
wUnits,
);
s.liveWidth = wUnits;
const newWidthPx: number =
uW * wUnits + (SpaceBetweenUnitsInPx - 2) * (wUnits - 1);
const newWidthPx: number = uW * wUnits + g * (wUnits - 1);
el.style.width = `${newWidthPx}px`;
}
if (s.mode === "resize-h" || s.mode === "resize-corner") {
const hPx: number = Math.max(uH, e.pageY - (window.scrollY + rect.top));
const hPx: number = Math.max(
uH,
e.pageY - (window.scrollY + rect.top),
);
let hUnits: number = GetDashboardComponentHeightInDashboardUnits(
p.totalCurrentDashboardWidthInPx,
hPx,
@@ -210,8 +203,7 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
hUnits = Math.max(c.minHeightInDashboardUnits, hUnits);
s.liveHeight = hUnits;
const newHeightPx: number =
uH * hUnits + SpaceBetweenUnitsInPx * (hUnits - 1);
const newHeightPx: number = uH * hUnits + g * (hUnits - 1);
el.style.height = `${newHeightPx}px`;
}
@@ -233,8 +225,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
overlay.style.inset = "0";
overlay.style.zIndex = "9999";
overlay.style.cursor = cursor;
// Transparent but captures all pointer events, preventing
// underlying components from firing mouseEnter/mouseLeave.
overlay.style.background = "transparent";
document.body.appendChild(overlay);
overlayRef.current = overlay;
@@ -253,7 +243,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
if (el) {
el.style.transform = "";
el.style.zIndex = "";
// Width/height are cleared so React's values take over after commit
el.style.width = "";
el.style.height = "";
}
@@ -268,7 +257,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
const c: DashboardBaseComponent = latestComponent.current;
const p: ComponentProps = latestProps.current;
// Build the final component — only the fields that changed
const updated: DashboardBaseComponent = { ...c };
let changed: boolean = false;
@@ -297,7 +285,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
}
}
// Clean up if component unmounts while dragging
useEffect(() => {
return () => {
window.removeEventListener("mousemove", onMouseMove);
@@ -333,7 +320,6 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
sessionRef.current = session;
setIsDragging(true);
// Show initial tooltip value
updateTooltip(session);
window.addEventListener("mousemove", onMouseMove);
@@ -379,20 +365,10 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
const className: string = [
"relative rounded-xl bg-white border overflow-hidden",
`col-span-${widthOfComponent} row-span-${heightOfComponent}`,
borderClass,
extraClass,
].join(" ");
// ── Computed sizes (React-controlled, used when NOT dragging) ──
const componentHeight: number =
unitH * heightOfComponent +
SpaceBetweenUnitsInPx * (heightOfComponent - 1);
const componentWidth: number =
unitW * widthOfComponent +
(SpaceBetweenUnitsInPx - 2) * (widthOfComponent - 1);
// ── Render ────────────────────────────────────────────────
const getMoveHandle: GetReactElementFunction = (): ReactElement => {
@@ -522,15 +498,13 @@ const DashboardBaseComponentElement: FunctionComponent<ComponentProps> = (
<div
className={className}
style={{
margin: `${MarginForEachUnitInPx}px`,
height: `${componentHeight}px`,
width: `${componentWidth}px`,
gridColumn: `span ${widthOfComponent}`,
gridRow: `span ${heightOfComponent}`,
boxShadow: isDragging
? "0 20px 40px -8px rgba(59,130,246,0.15), 0 8px 16px -4px rgba(0,0,0,0.08)"
: props.isSelected && props.isEditMode
? "0 4px 12px -2px rgba(59,130,246,0.12), 0 2px 4px -1px rgba(0,0,0,0.04)"
: "0 2px 8px -2px rgba(0,0,0,0.08), 0 1px 4px -1px rgba(0,0,0,0.04)",
// transition is disabled during drag so the transform is instant
transition: isDragging
? "none"
: "box-shadow 0.2s ease, border-color 0.2s ease",

View File

@@ -42,426 +42,83 @@ The Profiles implementation should follow this exact same pattern for consistenc
---
## Phase 1: Protocol & Ingestion Layer
## Phase 1: Protocol & Ingestion Layer ✅ COMPLETE
**Goal**: Accept OTLP Profiles data over gRPC and HTTP.
**Status**: HTTP endpoint, TelemetryType enum, middleware chain, and queue processing are all implemented.
### 1.1 Add Protobuf Definitions
**Implemented in:**
- HTTP endpoint `POST /otlp/v1/profiles`: `Telemetry/API/OTelIngest.ts`
- TelemetryType.Profile enum: `Common/Types/Telemetry/TelemetryType.ts`
- Queue service: `Telemetry/Services/Queue/ProfilesQueueService.ts`
- Queue handler: `Telemetry/Jobs/TelemetryIngest/ProcessTelemetry.ts`
Add the profiles proto files to `Telemetry/ProtoFiles/OTel/v1/`:
### Remaining Items
- `profiles.proto` — Core profiles data model (from `opentelemetry/proto/profiles/v1development/profiles.proto`)
- `profiles_service.proto` — ProfilesService with `Export` RPC
**Important:** The proto package is `opentelemetry.proto.profiles.v1development` (not `v1`). This `v1development` path will change to `v1` when Profiles reaches GA. Plan for this migration (see Risks section).
The OTLP Profiles format uses a **deduplicated stack representation** where each unique callstack is stored once, with dictionary tables for common entities (functions, locations, mappings). Key message types:
```protobuf
message ExportProfilesServiceRequest {
repeated ResourceProfiles resource_profiles = 1;
}
message ResourceProfiles {
Resource resource = 1;
repeated ScopeProfiles scope_profiles = 2;
string schema_url = 3;
}
message ScopeProfiles {
InstrumentationScope scope = 1;
repeated ProfileContainer profiles = 2;
}
message ProfileContainer {
bytes profile_id = 1;
int64 start_time_unix_nano = 2;
int64 end_time_unix_nano = 3;
Profile profile = 5;
// ...attributes, dropped_attributes_count
}
// NOTE: ProfilesDictionary is batch-scoped (shared across all profiles
// in a ProfilesData message), NOT per-profile. The ingestion service
// must pass the dictionary context when processing individual profiles.
message ProfilesDictionary {
repeated string string_table = 1;
repeated Mapping mapping_table = 2;
repeated Location location_table = 3;
repeated Function function_table = 4;
repeated Link link_table = 5;
// ...
}
message Profile {
repeated ValueType sample_type = 1;
repeated Sample sample = 2;
int64 time_unix_nano = 3;
int64 duration_nano = 4;
ValueType period_type = 5;
int64 period = 6;
bytes profile_id = 7;
repeated int32 attribute_indices = 8;
uint32 dropped_attributes_count = 9;
string original_payload_format = 10; // e.g., "pprofext"
bytes original_payload = 11; // raw pprof bytes for round-tripping
}
message Sample {
int32 stack_index = 1;
repeated int64 values = 2;
repeated int32 attribute_indices = 3;
int32 link_index = 4;
repeated int64 timestamps_unix_nano = 5; // NOTE: repeated — multiple timestamps per sample
}
```
### 1.2 Add TelemetryType Enum Value
In `Common/Types/Telemetry/TelemetryType.ts`, add `Profile = "Profile"` to the existing enum (currently: Metric, Trace, Log, Exception).
### 1.3 Register HTTP Endpoint
In `Telemetry/API/OTelIngest.ts`, add:
```
POST /otlp/v1/profiles
```
Follow the same middleware chain as traces/metrics/logs:
1. `OpenTelemetryRequestMiddleware.getProductType` — Decode protobuf/JSON, set `ProductType.Profiles`
2. `TelemetryIngest.isAuthorizedServiceMiddleware` — Validate `x-oneuptime-token`, extract `projectId`
3. Return 202 immediately
4. Queue for async processing
### 1.4 Register gRPC Service
In `Telemetry/GrpcServer.ts`, register the `ProfilesService/Export` RPC handler alongside the existing trace/metrics/logs handlers.
### 1.5 Update OTel Collector Config
In `OTelCollector/otel-collector-config.template.yaml`, add a `profiles` pipeline to the existing three pipelines (traces, metrics, logs):
```yaml
service:
pipelines:
profiles:
receivers: [otlp]
processors: []
exporters: [otlphttp]
```
**Note:** The OTel Collector in OneUptime is primarily used by the Kubernetes Agent. The main telemetry service handles OTLP ingestion directly. Also note: the OTel Arrow receiver does NOT yet support profiles.
### 1.6 Helm Chart Updates
In `HelmChart/Public/oneuptime/templates/telemetry.yaml`:
- No port changes needed (profiles use the same gRPC 4317 and HTTP 3403 ports)
- Add `TELEMETRY_PROFILE_FLUSH_BATCH_SIZE` environment variable
- Update KEDA autoscaling config to account for profiles queue load
### Estimated Effort: 1-2 weeks
- **1.4 Register gRPC Service** — `ProfilesService/Export` RPC handler not yet registered in `Telemetry/GrpcServer.ts`
- **1.5 Update OTel Collector Config** — No `profiles` pipeline in `OTelCollector/otel-collector-config.template.yaml` yet
- **1.6 Helm Chart Updates** — `TELEMETRY_PROFILE_FLUSH_BATCH_SIZE` env var and KEDA autoscaling updates not yet applied
---
## Phase 2: Data Model & ClickHouse Storage
## Phase 2: Data Model & ClickHouse Storage ✅ COMPLETE
**Goal**: Design an efficient ClickHouse schema for profile data.
**Status**: Both ClickHouse tables (profile, profile_sample) and database services are implemented with full schemas, ZSTD(3) compression, bloom filter skip indexes, and retention date support.
### 2.1 Design the Analytics Model
Create `Common/Models/AnalyticsModels/Profile.ts` following the pattern of `Span.ts`, `Metric.ts`, `Log.ts`.
**Proposed ClickHouse Table: `profile`**
| Column | Type | Description |
|--------|------|-------------|
| `projectId` | String (ObjectID) | Tenant ID |
| `serviceId` | String (ObjectID) | Service reference |
| `profileId` | String | Unique profile identifier |
| `traceId` | String | Correlation with traces |
| `spanId` | String | Correlation with spans |
| `startTime` | DateTime64(9) | Profile start timestamp |
| `endTime` | DateTime64(9) | Profile end timestamp |
| `duration` | UInt64 | Duration in nanoseconds |
| `profileType` | String | e.g., `cpu`, `wall`, `alloc_objects`, `alloc_space`, `goroutine` |
| `unit` | String | e.g., `nanoseconds`, `bytes`, `count` |
| `periodType` | String | Sampling period type |
| `period` | Int64 | Sampling period value |
| `attributes` | String (JSON) | Profile-level attributes (note: `KeyValueAndUnit`, not `KeyValue` — includes `unit` field) |
| `resourceAttributes` | String (JSON) | Resource attributes |
| `originalPayloadFormat` | String | e.g., `pprofext` — for pprof round-tripping |
| `originalPayload` | String (base64) | Raw pprof bytes (optional, for lossless re-export) |
| `retentionDate` | DateTime64 | TTL column for automatic expiry (pattern from existing tables) |
**Proposed ClickHouse Table: `profile_sample`**
This is the high-volume table storing individual samples (denormalized for query performance):
| Column | Type | Description |
|--------|------|-------------|
| `projectId` | String (ObjectID) | Tenant ID |
| `serviceId` | String (ObjectID) | Service reference |
| `profileId` | String | FK to profile table |
| `traceId` | String | Trace correlation (from Link table) |
| `spanId` | String | Span correlation (from Link table) |
| `time` | DateTime64(9) | Sample timestamp |
| `stacktrace` | Array(String) | Fully-resolved stack frames (function@file:line) |
| `stacktraceHash` | String | Hash of stacktrace for grouping |
| `frameTypes` | Array(String) | Per-frame runtime type (`kernel`, `native`, `jvm`, `cpython`, `go`, `v8js`, etc.) |
| `value` | Int64 | Sample value (CPU time, bytes, count) |
| `profileType` | String | Denormalized for filtering |
| `labels` | String (JSON) | Sample-level labels |
| `buildId` | String | Executable build ID (for deferred symbolization) |
| `retentionDate` | DateTime64 | TTL column for automatic expiry |
**Table Engine & Indexing:**
- Engine: `MergeTree`
- Partition by: `toYYYYMMDD(time)`
- Primary key: `(projectId, serviceId, time)`
- Order by: `(projectId, serviceId, time, profileType, stacktraceHash)`
- TTL: `time + INTERVAL dataRetentionInDays DAY`
- Skip indexes on `profileType`, `traceId`, `stacktraceHash`
### 2.2 Storage Considerations
**Why two tables?**
- The `profile` table stores metadata and is low-volume — used for listing/filtering profiles.
- The `profile_sample` table stores denormalized samples — high-volume but optimized for flamegraph aggregation queries.
- This mirrors the existing pattern where `ExceptionInstance` (ClickHouse) is a sub-signal of `Span`, with its own table but linked via `traceId`/`spanId`.
- Alternative: A single table with nested arrays for samples. This is more storage-efficient but makes aggregation queries harder. Start with two tables and revisit if needed.
**Denormalization strategy:**
The OTLP Profiles wire format uses dictionary-based deduplication (string tables, function tables, location tables). **Critically, the `ProfilesDictionary` is shared across ALL profiles in a `ProfilesData` batch** — you cannot process individual profiles without the batch-level dictionary context.
At ingestion time, we should **resolve all dictionary references** and store fully-materialized stack frames. This trades storage space for query simplicity — the same approach used for span attributes today.
**Inline frame handling:**
`Location.lines` is a repeated field supporting inlined functions — a single location can expand to multiple logical frames. The denormalization logic must expand these into the full stacktrace array.
**`original_payload` storage decision:**
The `Profile` message includes `original_payload_format` and `original_payload` fields containing the raw pprof bytes. Storing this enables lossless pprof round-trip export but significantly increases storage. Options:
- **Store always**: Full pprof compatibility, ~2-5x storage increase
- **Store on demand**: Only when `original_payload_format` is set (opt-in by producer)
- **Don't store**: Reconstruct pprof from denormalized data (lossy for some edge cases)
Recommendation: Store on demand (option 2) — only persist when the producer explicitly includes it.
**Expected data volume:**
- A typical eBPF profiler generates ~10-100 samples/second per process
- Each sample with a 20-frame stack ≈ 1-2 KB denormalized
- For 100 services, ~100K-1M samples/minute
- ClickHouse compression (LZ4) reduces this significantly, especially with sorted stacktrace hashes
### 2.3 Create Database Service
Create `Common/Server/Services/ProfileService.ts` and `Common/Server/Services/ProfileSampleService.ts` extending `AnalyticsDatabaseService<Profile>` and `AnalyticsDatabaseService<ProfileSample>`.
Add `TableBillingAccessControl` to both models following the pattern in existing analytics models to enable plan-based billing constraints on profile ingestion/querying.
### 2.4 Data Migration
Follow the migration pattern from `Worker/DataMigrations/AddRetentionDateAndSkipIndexesToTelemetryTables.ts`:
- Add `retentionDate` column with TTL expression: `retentionDate DELETE`
- Add skip indexes: `bloom_filter` on `traceId`, `profileId`, `stacktraceHash`; `set` on `profileType`
- Apply `ZSTD(3)` codec on `stacktrace` and `labels` columns (high compression benefit)
- Default retention: 15 days (matching existing telemetry defaults)
### Estimated Effort: 2-3 weeks
**Implemented in:**
- Profile model: `Common/Models/AnalyticsModels/Profile.ts`
- ProfileSample model: `Common/Models/AnalyticsModels/ProfileSample.ts`
- ProfileService: `Common/Server/Services/ProfileService.ts`
- ProfileSampleService: `Common/Server/Services/ProfileSampleService.ts`
- API routes registered in `App/FeatureSet/BaseAPI/Index.ts`
---
## Phase 3: Ingestion Service
## Phase 3: Ingestion Service ✅ COMPLETE
**Goal**: Process OTLP Profiles payloads and write to ClickHouse.
**Status**: Full OTLP Profiles ingestion is implemented including dictionary denormalization, inline frame handling, mixed-runtime stack support, trace/span correlation via Link table, stacktrace hashing (SHA256), batch processing, and graceful error handling.
### 3.1 Create Ingest Service
Create `Telemetry/Services/OtelProfilesIngestService.ts` extending `OtelIngestBaseService`:
```typescript
class OtelProfilesIngestService extends OtelIngestBaseService {
// Entry point
async ingestProfiles(request: ExportProfilesServiceRequest): Promise<void>;
// Denormalize OTLP profile data:
// 1. Resolve string_table references
// 2. Resolve function/location/mapping references
// 3. Build fully-qualified stack frames per sample
// 4. Extract trace_id/span_id for correlation
// 5. Buffer and batch-insert into ClickHouse
async processProfile(profile: ProfileContainer, resource: Resource): Promise<void>;
// Flush buffer (batch size: 500 samples)
async flushProfilesBuffer(): Promise<void>;
}
```
### 3.2 Create Queue Service
Create `Telemetry/Services/Queue/ProfilesQueueService.ts`:
- Add `TelemetryType.Profiles` enum value
- Register queue handler in `Telemetry/Jobs/TelemetryIngest/ProcessTelemetry.ts`
- Batch size: 500 (start conservative, tune later)
### 3.3 Key Implementation Details
**Denormalization logic** (the hardest part of this phase):
The OTLP Profile message uses dictionary tables for compression. **The dictionary is batch-scoped** — it lives on the `ProfilesData` message, not on individual `Profile` messages. The ingestion service must pass the dictionary when processing each profile.
```
dictionary = profilesData.dictionary // batch-level dictionary
For each resourceProfiles in profilesData.resource_profiles:
For each scopeProfiles in resourceProfiles.scope_profiles:
For each profile in scopeProfiles.profiles:
For each sample in profile.sample:
stack = dictionary.stack_table[sample.stack_index]
For each location_index in stack.location_indices:
location = dictionary.location_table[location_index]
// Handle INLINE FRAMES: location.lines is repeated
For each line in location.lines:
function = dictionary.function_table[line.function_index]
function_name = dictionary.string_table[function.name_strindex]
system_name = dictionary.string_table[function.system_name_strindex] // mangled name
file_name = dictionary.string_table[function.filename_strindex]
frame_type = attributes[profile.frame.type] // kernel, native, jvm, etc.
frame = "${function_name}@${file_name}:${line.line}"
Build stacktrace array from all frames (including inlined)
Compute stacktrace_hash = SHA256(stacktrace)
// Resolve trace correlation from Link table
link = dictionary.link_table[sample.link_index]
trace_id = link.trace_id
span_id = link.span_id
// Note: sample.timestamps_unix_nano is REPEATED (multiple timestamps per sample)
// Use first timestamp as sample time, store all if needed
Extract value from sample.values[type_index]
Write denormalized row to buffer
```
**Mixed-runtime stacks:**
The eBPF agent produces stacks that cross kernel/native/managed boundaries (e.g., kernel → libc → JVM → application Java code). Each frame has a `profile.frame.type` attribute. Store this per-frame in the `frameTypes` array column for proper rendering.
**Unsymbolized frames:**
Not all frames will be symbolized at ingestion time (especially native/kernel frames from eBPF). Store the mapping `build_id` attributes (`process.executable.build_id.gnu`, `.go`, `.htlhash`) so frames can be symbolized later when debug info becomes available. See Phase 6 for symbolization pipeline.
**pprof interoperability:**
If `original_payload_format` is set (e.g., `pprofext`), store the `original_payload` bytes for lossless re-export. The OTLP Profiles format supports round-trip conversion to/from pprof with no information loss.
### Estimated Effort: 2-3 weeks
**Implemented in:**
- Ingest service (835 lines): `Telemetry/Services/OtelProfilesIngestService.ts`
- Queue service: `Telemetry/Services/Queue/ProfilesQueueService.ts`
- Queue handler: `Telemetry/Jobs/TelemetryIngest/ProcessTelemetry.ts`
---
## Phase 4: Query API
## Phase 4: Query API ✅ MOSTLY COMPLETE
**Goal**: Expose APIs for querying and aggregating profile data.
**Status**: Flamegraph aggregation and function list queries are implemented with tree-building algorithm, filtering by projectId/profileId/serviceId/time ranges/profile type, and a 50K sample limit per query.
### 4.1 Core Query Endpoints
**Implemented in:**
- ProfileAggregationService (417 lines): `Common/Server/Services/ProfileAggregationService.ts`
- `getFlamegraph()` — Aggregated flamegraph tree from samples
- `getFunctionList()` — Top functions by selfValue, totalValue, or sampleCount
- CRUD routes for profile/profile-sample: `App/FeatureSet/BaseAPI/Index.ts`
Add to the telemetry API router:
### Remaining Items
| Endpoint | Purpose |
|----------|---------|
| `GET /profiles` | List profiles with filters (service, time range, profile type) |
| `GET /profiles/:profileId` | Get profile metadata |
| `GET /profiles/:profileId/flamegraph` | Aggregated flamegraph data for a single profile |
| `GET /profiles/aggregate/flamegraph` | Aggregated flamegraph across multiple profiles (time range) |
| `GET /profiles/function-list` | Top functions by self/total time |
| `GET /profiles/diff` | Diff flamegraph between two time ranges |
### 4.2 Flamegraph Aggregation Query
The core query for flamegraph rendering in ClickHouse:
```sql
SELECT
stacktrace,
SUM(value) as total_value
FROM profile_sample
WHERE projectId = {projectId}
AND serviceId = {serviceId}
AND time BETWEEN {startTime} AND {endTime}
AND profileType = {profileType}
GROUP BY stacktrace
ORDER BY total_value DESC
LIMIT 10000
```
The API layer then builds a tree structure from flat stacktraces for the frontend flamegraph component.
### 4.3 Cross-Signal Correlation Queries
Leverage `traceId`/`spanId` columns for correlation:
```sql
-- Get profile samples for a specific trace
SELECT stacktrace, SUM(value) as total_value
FROM profile_sample
WHERE projectId = {projectId}
AND traceId = {traceId}
GROUP BY stacktrace
-- Get profile samples for a specific span
SELECT stacktrace, SUM(value) as total_value
FROM profile_sample
WHERE projectId = {projectId}
AND spanId = {spanId}
GROUP BY stacktrace
```
This enables a "View Profile" button on the trace detail page.
### Estimated Effort: 2 weeks
- **Diff flamegraph endpoint** — `GET /profiles/diff` for comparing two time ranges not yet implemented
- **Cross-signal correlation queries** — Dedicated endpoints for querying profiles by `traceId`/`spanId` (e.g., "View Profile" button on trace detail page)
---
## Phase 5: Frontend — Profiles UI
## Phase 5: Frontend — Profiles UI ✅ MOSTLY COMPLETE
**Goal**: Build the profiles exploration and visualization UI.
**Status**: Core pages (listing, detail view, layout, side menu, documentation) and key components (flamegraph, function list, profiles table) are implemented.
### 5.1 New Pages & Routes
**Implemented in:**
- Pages: `App/FeatureSet/Dashboard/src/Pages/Profiles/` (Index, View/Index, Layout, SideMenu, Documentation)
- Components: `App/FeatureSet/Dashboard/src/Components/Profiles/` (ProfileFlamegraph, ProfileFunctionList, ProfileTable)
Add to `App/FeatureSet/Dashboard/src/`:
### Remaining Items
- `Pages/Profiles/ProfileList.tsx` — List/search profiles by service, time range, type
- `Pages/Profiles/ProfileDetail.tsx` — Single profile detail view
- `Routes/ProfilesRoutes.tsx` — Route definitions
### 5.2 Core Components
| Component | Purpose |
|-----------|---------|
| `Components/Profiles/FlameGraph.tsx` | Interactive flamegraph (CPU/memory/alloc). Consider using an existing open-source flamegraph library (e.g., `speedscope` or `d3-flame-graph`) |
| `Components/Profiles/FunctionList.tsx` | Table of functions sorted by self/total time with search |
| `Components/Profiles/ProfileTypeSelector.tsx` | Dropdown to select profile type (CPU, heap, goroutine, etc.) |
| `Components/Profiles/DiffFlameGraph.tsx` | Side-by-side or differential flamegraph comparing two time ranges |
| `Components/Profiles/ProfileTimeline.tsx` | Timeline showing profile sample density over time |
**Frame type color coding:**
Mixed-runtime stacks from the eBPF agent contain frames from different runtimes (kernel, native, JVM, CPython, Go, V8, etc.). The flamegraph component should color-code frames by their `profile.frame.type` attribute so users can visually distinguish application code from kernel/native/runtime internals. Suggested palette:
- Kernel frames: red/orange
- Native (C/C++/Rust): blue
- JVM/Go/V8/CPython/Ruby: green shades (per runtime)
### 5.3 Sidebar Navigation
Create `Pages/Profiles/SideMenu.tsx` following the existing pattern (see `Pages/Traces/SideMenu.tsx`, `Pages/Metrics/SideMenu.tsx`, `Pages/Logs/SideMenu.tsx`):
- Main section: "Profiles" → PageMap.PROFILES
- Documentation section: Link to PROFILES_DOCUMENTATION route
Add "Profiles" entry to the main dashboard navigation sidebar.
### 5.4 Cross-Signal Integration
- **Trace Detail Page**: Add a "Profile" tab/button on `TraceExplorer.tsx` that links to the flamegraph filtered by `traceId`.
- **Span Detail**: When viewing a span, show an inline flamegraph if profile samples exist for that `spanId`.
- **Service Overview**: Add a "Profiles" tab on the service detail page showing aggregated flamegraphs.
### Estimated Effort: 3-4 weeks
- **DiffFlameGraph component** — Side-by-side or differential flamegraph comparing two time ranges
- **ProfileTimeline component** — Timeline showing profile sample density over time
- **ProfileTypeSelector component** — Dropdown to select profile type (CPU, heap, goroutine, etc.)
- **Frame type color coding** — Color-code flamegraph frames by `profile.frame.type` (kernel=red/orange, native=blue, managed=green shades)
- **5.4 Cross-Signal Integration**:
- Trace Detail Page: Add "Profile" tab/button on `TraceExplorer.tsx` linking to flamegraph by `traceId`
- Span Detail: Inline flamegraph when profile samples exist for a `spanId`
- Service Overview: "Profiles" tab on service detail page with aggregated flamegraphs
---
@@ -538,19 +195,17 @@ Add `Telemetry/Docs/profileData.example.json` with a sample OTLP Profiles payloa
## Summary Timeline
| Phase | Description | Effort | Dependencies |
|-------|-------------|--------|--------------|
| 1 | Protocol & Ingestion Layer | 1-2 weeks | None |
| 2 | Data Model & ClickHouse Storage | 2-3 weeks | Phase 1 |
| 3 | Ingestion Service | 2-3 weeks | Phase 1, 2 |
| 4 | Query API | 2 weeks | Phase 2, 3 |
| 5 | Frontend — Profiles UI | 3-4 weeks | Phase 4 |
| 6 | Production Hardening (incl. symbolization, alerting, conformance) | 3-4 weeks | Phase 5 |
| 7 | Documentation & Launch | 1 week | Phase 6 |
| Phase | Description | Status |
|-------|-------------|--------|
| 1 | Protocol & Ingestion Layer | ✅ Complete (gRPC, OTel Collector config, Helm chart remaining) |
| 2 | Data Model & ClickHouse Storage | ✅ Complete |
| 3 | Ingestion Service | ✅ Complete |
| 4 | Query API | ✅ Mostly complete (diff flamegraph, cross-signal endpoints remaining) |
| 5 | Frontend — Profiles UI | ✅ Mostly complete (diff view, timeline, color coding, cross-signal integration remaining) |
| 6 | Production Hardening | ❌ Not started |
| 7 | Documentation & Launch | ❌ Not started |
**Total estimated effort: 14-21 weeks** (with parallelization of phases 4+5, closer to 11-16 weeks)
**Suggested MVP scope (Phases 1-5):** Ship ingestion + storage + basic flamegraph UI first (~9-14 weeks). Symbolization, alerting integration, and pprof export can follow as iterative improvements.
**Remaining work is primarily:** Phase 1 gaps (gRPC/Helm), Phase 4-5 advanced features (diff flamegraphs, cross-signal integration, frame type color coding), and all of Phases 6-7 (symbolization, alerting, pprof export, conformance, docs).
---