mirror of
https://github.com/OneUptime/oneuptime.git
synced 2026-04-06 00:32:12 +02:00
feat: Add profiling support across services and implement new metrics
- Integrated profiling initialization in Probe, Telemetry, TestServer, and Worker services. - Added environment variables for enabling profiling in various services. - Created Profiling utility to handle CPU profiling and send data to OTLP endpoint. - Introduced new metric types for exceptions, spans, and dashboards. - Developed utility classes for handling alert and incident metrics. - Added new React components for displaying alert and incident metrics in the dashboard.
This commit is contained in:
@@ -11,6 +11,7 @@ import { PromiseVoidFunction } from "Common/Types/FunctionTypes";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import Express, { ExpressApplication } from "Common/Server/Utils/Express";
|
||||
import "ejs";
|
||||
|
||||
@@ -23,6 +24,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
logger.info("AI Agent Service - Starting...");
|
||||
|
||||
// init the app
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
import React, {
|
||||
FunctionComponent,
|
||||
ReactElement,
|
||||
useCallback,
|
||||
useState,
|
||||
} from "react";
|
||||
import ObjectID from "Common/Types/ObjectID";
|
||||
import AlertMetricType from "Common/Types/Alerts/AlertMetricType";
|
||||
import AlertMetricTypeUtil from "Common/Utils/Alerts/AlertMetricType";
|
||||
import MetricView from "../Metrics/MetricView";
|
||||
import ProjectUtil from "Common/UI/Utils/Project";
|
||||
import MetricQueryConfigData from "Common/Types/Metrics/MetricQueryConfigData";
|
||||
import MetricViewData from "Common/Types/Metrics/MetricViewData";
|
||||
import InBetween from "Common/Types/BaseDatabase/InBetween";
|
||||
import RangeStartAndEndDateTime, {
|
||||
RangeStartAndEndDateTimeUtil,
|
||||
} from "Common/Types/Time/RangeStartAndEndDateTime";
|
||||
import TimeRange from "Common/Types/Time/TimeRange";
|
||||
import RangeStartAndEndDateView from "Common/UI/Components/Date/RangeStartAndEndDateView";
|
||||
import Card from "Common/UI/Components/Card/Card";
|
||||
|
||||
export interface ComponentProps {
|
||||
monitorId: ObjectID;
|
||||
}
|
||||
|
||||
const MonitorAlertMetrics: FunctionComponent<ComponentProps> = (
|
||||
props: ComponentProps,
|
||||
): ReactElement => {
|
||||
const alertMetricTypes: Array<AlertMetricType> =
|
||||
AlertMetricTypeUtil.getAllAlertMetricTypes();
|
||||
|
||||
const [timeRange, setTimeRange] = useState<RangeStartAndEndDateTime>({
|
||||
range: TimeRange.PAST_ONE_DAY,
|
||||
});
|
||||
|
||||
type GetQueryConfigsFunction = () => Array<MetricQueryConfigData>;
|
||||
|
||||
const getQueryConfigs: GetQueryConfigsFunction =
|
||||
(): Array<MetricQueryConfigData> => {
|
||||
const queries: Array<MetricQueryConfigData> = [];
|
||||
|
||||
for (const metricType of alertMetricTypes) {
|
||||
queries.push({
|
||||
metricAliasData: {
|
||||
metricVariable: metricType,
|
||||
title: AlertMetricTypeUtil.getTitleByAlertMetricType(metricType),
|
||||
description:
|
||||
AlertMetricTypeUtil.getDescriptionByAlertMetricType(metricType),
|
||||
legend: AlertMetricTypeUtil.getLegendByAlertMetricType(metricType),
|
||||
legendUnit:
|
||||
AlertMetricTypeUtil.getLegendUnitByAlertMetricType(metricType),
|
||||
},
|
||||
metricQueryData: {
|
||||
filterData: {
|
||||
metricName: metricType,
|
||||
attributes: {
|
||||
monitorId: props.monitorId.toString(),
|
||||
projectId: ProjectUtil.getCurrentProjectId()?.toString() || "",
|
||||
},
|
||||
aggegationType:
|
||||
AlertMetricTypeUtil.getAggregationTypeByAlertMetricType(
|
||||
metricType,
|
||||
),
|
||||
},
|
||||
groupBy: undefined,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return queries;
|
||||
};
|
||||
|
||||
const [metricViewData, setMetricViewData] = useState<MetricViewData>({
|
||||
startAndEndDate: RangeStartAndEndDateTimeUtil.getStartAndEndDate({
|
||||
range: TimeRange.PAST_ONE_DAY,
|
||||
}),
|
||||
queryConfigs: getQueryConfigs(),
|
||||
formulaConfigs: [],
|
||||
});
|
||||
|
||||
const handleTimeRangeChange: (
|
||||
newTimeRange: RangeStartAndEndDateTime,
|
||||
) => void = useCallback((newTimeRange: RangeStartAndEndDateTime): void => {
|
||||
setTimeRange(newTimeRange);
|
||||
const dateRange: InBetween<Date> =
|
||||
RangeStartAndEndDateTimeUtil.getStartAndEndDate(newTimeRange);
|
||||
setMetricViewData((prev: MetricViewData) => {
|
||||
return {
|
||||
...prev,
|
||||
startAndEndDate: dateRange,
|
||||
};
|
||||
});
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Card
|
||||
title="Alert Metrics"
|
||||
description="Alert metrics for this monitor - count, time to acknowledge, time to resolve, and duration."
|
||||
rightElement={
|
||||
<RangeStartAndEndDateView
|
||||
dashboardStartAndEndDate={timeRange}
|
||||
onChange={handleTimeRangeChange}
|
||||
/>
|
||||
}
|
||||
>
|
||||
<MetricView
|
||||
data={metricViewData}
|
||||
hideQueryElements={true}
|
||||
hideStartAndEndDate={true}
|
||||
hideCardInCharts={true}
|
||||
onChange={(data: MetricViewData) => {
|
||||
setMetricViewData({
|
||||
...data,
|
||||
queryConfigs: getQueryConfigs(),
|
||||
formulaConfigs: [],
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
};
|
||||
|
||||
export default MonitorAlertMetrics;
|
||||
@@ -0,0 +1,129 @@
|
||||
import React, {
|
||||
FunctionComponent,
|
||||
ReactElement,
|
||||
useCallback,
|
||||
useState,
|
||||
} from "react";
|
||||
import ObjectID from "Common/Types/ObjectID";
|
||||
import IncidentMetricType from "Common/Types/Incident/IncidentMetricType";
|
||||
import IncidentMetricTypeUtil from "Common/Utils/Incident/IncidentMetricType";
|
||||
import MetricView from "../Metrics/MetricView";
|
||||
import ProjectUtil from "Common/UI/Utils/Project";
|
||||
import MetricQueryConfigData from "Common/Types/Metrics/MetricQueryConfigData";
|
||||
import MetricViewData from "Common/Types/Metrics/MetricViewData";
|
||||
import InBetween from "Common/Types/BaseDatabase/InBetween";
|
||||
import RangeStartAndEndDateTime, {
|
||||
RangeStartAndEndDateTimeUtil,
|
||||
} from "Common/Types/Time/RangeStartAndEndDateTime";
|
||||
import TimeRange from "Common/Types/Time/TimeRange";
|
||||
import RangeStartAndEndDateView from "Common/UI/Components/Date/RangeStartAndEndDateView";
|
||||
import Card from "Common/UI/Components/Card/Card";
|
||||
|
||||
export interface ComponentProps {
|
||||
monitorId: ObjectID;
|
||||
}
|
||||
|
||||
const MonitorIncidentMetrics: FunctionComponent<ComponentProps> = (
|
||||
props: ComponentProps,
|
||||
): ReactElement => {
|
||||
const incidentMetricTypes: Array<IncidentMetricType> =
|
||||
IncidentMetricTypeUtil.getAllIncidentMetricTypes();
|
||||
|
||||
const [timeRange, setTimeRange] = useState<RangeStartAndEndDateTime>({
|
||||
range: TimeRange.PAST_ONE_DAY,
|
||||
});
|
||||
|
||||
type GetQueryConfigsFunction = () => Array<MetricQueryConfigData>;
|
||||
|
||||
const getQueryConfigs: GetQueryConfigsFunction =
|
||||
(): Array<MetricQueryConfigData> => {
|
||||
const queries: Array<MetricQueryConfigData> = [];
|
||||
|
||||
for (const metricType of incidentMetricTypes) {
|
||||
queries.push({
|
||||
metricAliasData: {
|
||||
metricVariable: metricType,
|
||||
title:
|
||||
IncidentMetricTypeUtil.getTitleByIncidentMetricType(metricType),
|
||||
description:
|
||||
IncidentMetricTypeUtil.getDescriptionByIncidentMetricType(
|
||||
metricType,
|
||||
),
|
||||
legend:
|
||||
IncidentMetricTypeUtil.getLegendByIncidentMetricType(metricType),
|
||||
legendUnit:
|
||||
IncidentMetricTypeUtil.getLegendUnitByIncidentMetricType(
|
||||
metricType,
|
||||
),
|
||||
},
|
||||
metricQueryData: {
|
||||
filterData: {
|
||||
metricName: metricType,
|
||||
attributes: {
|
||||
monitorIds: props.monitorId.toString(),
|
||||
projectId: ProjectUtil.getCurrentProjectId()?.toString() || "",
|
||||
},
|
||||
aggegationType:
|
||||
IncidentMetricTypeUtil.getAggregationTypeByIncidentMetricType(
|
||||
metricType,
|
||||
),
|
||||
},
|
||||
groupBy: undefined,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return queries;
|
||||
};
|
||||
|
||||
const [metricViewData, setMetricViewData] = useState<MetricViewData>({
|
||||
startAndEndDate: RangeStartAndEndDateTimeUtil.getStartAndEndDate({
|
||||
range: TimeRange.PAST_ONE_DAY,
|
||||
}),
|
||||
queryConfigs: getQueryConfigs(),
|
||||
formulaConfigs: [],
|
||||
});
|
||||
|
||||
const handleTimeRangeChange: (
|
||||
newTimeRange: RangeStartAndEndDateTime,
|
||||
) => void = useCallback((newTimeRange: RangeStartAndEndDateTime): void => {
|
||||
setTimeRange(newTimeRange);
|
||||
const dateRange: InBetween<Date> =
|
||||
RangeStartAndEndDateTimeUtil.getStartAndEndDate(newTimeRange);
|
||||
setMetricViewData((prev: MetricViewData) => {
|
||||
return {
|
||||
...prev,
|
||||
startAndEndDate: dateRange,
|
||||
};
|
||||
});
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Card
|
||||
title="Incident Metrics"
|
||||
description="Incident metrics for this monitor - count, time to acknowledge, time to resolve, and duration."
|
||||
rightElement={
|
||||
<RangeStartAndEndDateView
|
||||
dashboardStartAndEndDate={timeRange}
|
||||
onChange={handleTimeRangeChange}
|
||||
/>
|
||||
}
|
||||
>
|
||||
<MetricView
|
||||
data={metricViewData}
|
||||
hideQueryElements={true}
|
||||
hideStartAndEndDate={true}
|
||||
hideCardInCharts={true}
|
||||
onChange={(data: MetricViewData) => {
|
||||
setMetricViewData({
|
||||
...data,
|
||||
queryConfigs: getQueryConfigs(),
|
||||
formulaConfigs: [],
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
};
|
||||
|
||||
export default MonitorIncidentMetrics;
|
||||
@@ -1,235 +1,79 @@
|
||||
import DisabledWarning from "../../../Components/Monitor/DisabledWarning";
|
||||
import IncidentsTable from "../../../Components/Incident/IncidentsTable";
|
||||
import AlertsTable from "../../../Components/Alert/AlertsTable";
|
||||
import MonitorMetricsElement from "../../../Components/Monitor/MonitorMetrics";
|
||||
import MonitorIncidentMetrics from "../../../Components/Monitor/MonitorIncidentMetrics";
|
||||
import MonitorAlertMetrics from "../../../Components/Monitor/MonitorAlertMetrics";
|
||||
import PageComponentProps from "../../PageComponentProps";
|
||||
import ObjectID from "Common/Types/ObjectID";
|
||||
import Navigation from "Common/UI/Utils/Navigation";
|
||||
import React, { Fragment, FunctionComponent, ReactElement, useState } from "react";
|
||||
import React, { Fragment, FunctionComponent, ReactElement, useEffect, useState } from "react";
|
||||
import Tabs from "Common/UI/Components/Tabs/Tabs";
|
||||
import { Tab } from "Common/UI/Components/Tabs/Tab";
|
||||
import Incident from "Common/Models/DatabaseModels/Incident";
|
||||
import Alert from "Common/Models/DatabaseModels/Alert";
|
||||
import Query from "Common/Types/BaseDatabase/Query";
|
||||
import ProjectUtil from "Common/UI/Utils/Project";
|
||||
import Includes from "Common/Types/BaseDatabase/Includes";
|
||||
import MonitorStatusTimeline from "Common/Models/DatabaseModels/MonitorStatusTimeline";
|
||||
import MonitorStatus from "Common/Models/DatabaseModels/MonitorStatus";
|
||||
import ModelTable from "Common/UI/Components/ModelTable/ModelTable";
|
||||
import FieldType from "Common/UI/Components/Types/FieldType";
|
||||
import SortOrder from "Common/Types/BaseDatabase/SortOrder";
|
||||
import BadDataException from "Common/Types/Exception/BadDataException";
|
||||
import Statusbubble from "Common/UI/Components/StatusBubble/StatusBubble";
|
||||
import { Black } from "Common/Types/BrandColors";
|
||||
import OneUptimeDate from "Common/Types/Date";
|
||||
import FormFieldSchemaType from "Common/UI/Components/Forms/Types/FormFieldSchemaType";
|
||||
import MonitorType from "Common/Types/Monitor/MonitorType";
|
||||
import MonitorMetricTypeUtil from "Common/Utils/Monitor/MonitorMetricType";
|
||||
import Monitor from "Common/Models/DatabaseModels/Monitor";
|
||||
import ModelAPI from "Common/UI/Utils/ModelAPI/ModelAPI";
|
||||
import API from "Common/UI/Utils/API/API";
|
||||
import PageLoader from "Common/UI/Components/Loader/PageLoader";
|
||||
import ErrorMessage from "Common/UI/Components/ErrorMessage/ErrorMessage";
|
||||
|
||||
const MonitorMetrics: FunctionComponent<
|
||||
PageComponentProps
|
||||
> = (props: PageComponentProps): ReactElement => {
|
||||
> = (): ReactElement => {
|
||||
const modelId: ObjectID = Navigation.getLastParamAsObjectID(1);
|
||||
|
||||
const [_currentTab, setCurrentTab] = useState<Tab | null>(null);
|
||||
const [monitorType, setMonitorType] = useState<MonitorType | null>(null);
|
||||
const [isLoading, setIsLoading] = useState<boolean>(true);
|
||||
const [error, setError] = useState<string>("");
|
||||
|
||||
const incidentQuery: Query<Incident> = {
|
||||
projectId: ProjectUtil.getCurrentProjectId()!,
|
||||
monitors: new Includes([modelId]),
|
||||
};
|
||||
useEffect(() => {
|
||||
setIsLoading(true);
|
||||
ModelAPI.getItem({
|
||||
modelType: Monitor,
|
||||
id: modelId,
|
||||
select: { monitorType: true },
|
||||
})
|
||||
.then((item: Monitor | null) => {
|
||||
setMonitorType(item?.monitorType || null);
|
||||
setIsLoading(false);
|
||||
})
|
||||
.catch((err: Error) => {
|
||||
setError(API.getFriendlyMessage(err));
|
||||
setIsLoading(false);
|
||||
});
|
||||
}, []);
|
||||
|
||||
const alertQuery: Query<Alert> = {
|
||||
projectId: ProjectUtil.getCurrentProjectId()!,
|
||||
monitor: modelId,
|
||||
};
|
||||
if (isLoading) {
|
||||
return <PageLoader isVisible={true} />;
|
||||
}
|
||||
|
||||
const tabs: Array<Tab> = [
|
||||
{
|
||||
if (error) {
|
||||
return <ErrorMessage message={error} />;
|
||||
}
|
||||
|
||||
const hasMonitorMetrics: boolean =
|
||||
monitorType !== null &&
|
||||
MonitorMetricTypeUtil.getMonitorMetricTypesByMonitorType(monitorType)
|
||||
.length > 0;
|
||||
|
||||
const tabs: Array<Tab> = [];
|
||||
|
||||
if (hasMonitorMetrics) {
|
||||
tabs.push({
|
||||
name: "Monitor Metrics",
|
||||
children: <MonitorMetricsElement monitorId={modelId} />,
|
||||
},
|
||||
{
|
||||
name: "Incidents",
|
||||
children: (
|
||||
<IncidentsTable
|
||||
query={incidentQuery}
|
||||
noItemsMessage="No incidents found for this monitor."
|
||||
title="Monitor Incidents"
|
||||
description="Incidents associated with this monitor."
|
||||
/>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: "Alerts",
|
||||
children: (
|
||||
<AlertsTable
|
||||
query={alertQuery}
|
||||
noItemsMessage="No alerts found for this monitor."
|
||||
title="Monitor Alerts"
|
||||
description="Alerts associated with this monitor."
|
||||
createInitialValues={{
|
||||
monitor: modelId,
|
||||
}}
|
||||
/>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: "Status Timeline",
|
||||
children: (
|
||||
<ModelTable<MonitorStatusTimeline>
|
||||
modelType={MonitorStatusTimeline}
|
||||
id="table-monitor-status-timeline"
|
||||
name="Monitor > Status Timeline"
|
||||
userPreferencesKey="monitor-status-timeline-table"
|
||||
isDeleteable={true}
|
||||
showViewIdButton={true}
|
||||
isCreateable={true}
|
||||
isViewable={false}
|
||||
query={{
|
||||
monitorId: modelId,
|
||||
projectId: ProjectUtil.getCurrentProjectId()!,
|
||||
}}
|
||||
sortBy="startsAt"
|
||||
sortOrder={SortOrder.Descending}
|
||||
onBeforeCreate={(
|
||||
item: MonitorStatusTimeline,
|
||||
): Promise<MonitorStatusTimeline> => {
|
||||
if (!props.currentProject || !props.currentProject._id) {
|
||||
throw new BadDataException("Project ID cannot be null");
|
||||
}
|
||||
item.monitorId = modelId;
|
||||
item.projectId = new ObjectID(props.currentProject._id);
|
||||
return Promise.resolve(item);
|
||||
}}
|
||||
cardProps={{
|
||||
title: "Status Timeline",
|
||||
description: "Here is the status timeline for this monitor",
|
||||
}}
|
||||
noItemsMessage={
|
||||
"No status timeline created for this monitor so far."
|
||||
}
|
||||
formFields={[
|
||||
{
|
||||
field: {
|
||||
monitorStatus: true,
|
||||
},
|
||||
title: "Monitor Status",
|
||||
fieldType: FormFieldSchemaType.Dropdown,
|
||||
required: true,
|
||||
placeholder: "Monitor Status",
|
||||
dropdownModal: {
|
||||
type: MonitorStatus,
|
||||
labelField: "name",
|
||||
valueField: "_id",
|
||||
},
|
||||
},
|
||||
{
|
||||
field: {
|
||||
startsAt: true,
|
||||
},
|
||||
title: "Starts At",
|
||||
fieldType: FormFieldSchemaType.DateTime,
|
||||
required: true,
|
||||
placeholder: "Starts At",
|
||||
getDefaultValue: () => {
|
||||
return OneUptimeDate.getCurrentDate();
|
||||
},
|
||||
},
|
||||
]}
|
||||
showRefreshButton={true}
|
||||
viewPageRoute={Navigation.getCurrentRoute()}
|
||||
filters={[
|
||||
{
|
||||
field: {
|
||||
monitorStatus: {
|
||||
name: true,
|
||||
},
|
||||
},
|
||||
title: "Monitor Status",
|
||||
type: FieldType.Entity,
|
||||
filterEntityType: MonitorStatus,
|
||||
filterQuery: {
|
||||
projectId: ProjectUtil.getCurrentProjectId()!,
|
||||
},
|
||||
filterDropdownField: {
|
||||
label: "name",
|
||||
value: "_id",
|
||||
},
|
||||
},
|
||||
{
|
||||
field: {
|
||||
startsAt: true,
|
||||
},
|
||||
title: "Starts At",
|
||||
type: FieldType.Date,
|
||||
},
|
||||
{
|
||||
field: {
|
||||
endsAt: true,
|
||||
},
|
||||
title: "Ends At",
|
||||
type: FieldType.Date,
|
||||
},
|
||||
]}
|
||||
columns={[
|
||||
{
|
||||
field: {
|
||||
monitorStatus: {
|
||||
name: true,
|
||||
color: true,
|
||||
},
|
||||
},
|
||||
title: "Monitor Status",
|
||||
type: FieldType.Text,
|
||||
getElement: (item: MonitorStatusTimeline): ReactElement => {
|
||||
if (!item["monitorStatus"]) {
|
||||
throw new BadDataException("Monitor Status not found");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return (
|
||||
<Statusbubble
|
||||
color={item.monitorStatus.color || Black}
|
||||
shouldAnimate={false}
|
||||
text={item.monitorStatus.name || "Unknown"}
|
||||
/>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
field: {
|
||||
startsAt: true,
|
||||
},
|
||||
title: "Starts At",
|
||||
type: FieldType.DateTime,
|
||||
},
|
||||
{
|
||||
field: {
|
||||
endsAt: true,
|
||||
},
|
||||
title: "Ends At",
|
||||
type: FieldType.DateTime,
|
||||
noValueMessage: "Currently Active",
|
||||
},
|
||||
{
|
||||
field: {
|
||||
endsAt: true,
|
||||
},
|
||||
title: "Duration",
|
||||
type: FieldType.Text,
|
||||
getElement: (item: MonitorStatusTimeline): ReactElement => {
|
||||
return (
|
||||
<p>
|
||||
{OneUptimeDate.differenceBetweenTwoDatesAsFromattedString(
|
||||
item["startsAt"] as Date,
|
||||
(item["endsAt"] as Date) || OneUptimeDate.getCurrentDate(),
|
||||
)}
|
||||
</p>
|
||||
);
|
||||
},
|
||||
},
|
||||
]}
|
||||
/>
|
||||
),
|
||||
},
|
||||
];
|
||||
tabs.push({
|
||||
name: "Incident Metrics",
|
||||
children: <MonitorIncidentMetrics monitorId={modelId} />,
|
||||
});
|
||||
|
||||
tabs.push({
|
||||
name: "Alert Metrics",
|
||||
children: <MonitorAlertMetrics monitorId={modelId} />,
|
||||
});
|
||||
|
||||
return (
|
||||
<Fragment>
|
||||
|
||||
@@ -46,18 +46,16 @@ const DashboardSideMenu: FunctionComponent<ComponentProps> = (
|
||||
},
|
||||
];
|
||||
|
||||
if (MonitorTypeHelper.doesMonitorTypeHaveGraphs(props.monitorType)) {
|
||||
overviewItems.push({
|
||||
link: {
|
||||
title: "Metrics",
|
||||
to: RouteUtil.populateRouteParams(
|
||||
RouteMap[PageMap.MONITOR_VIEW_METRICS] as Route,
|
||||
{ modelId: props.modelId },
|
||||
),
|
||||
},
|
||||
icon: IconProp.Graph,
|
||||
});
|
||||
}
|
||||
overviewItems.push({
|
||||
link: {
|
||||
title: "Metrics",
|
||||
to: RouteUtil.populateRouteParams(
|
||||
RouteMap[PageMap.MONITOR_VIEW_METRICS] as Route,
|
||||
{ modelId: props.modelId },
|
||||
),
|
||||
},
|
||||
icon: IconProp.Graph,
|
||||
});
|
||||
|
||||
overviewItems.push({
|
||||
link: {
|
||||
|
||||
@@ -16,6 +16,15 @@ import ConfirmModal from "Common/UI/Components/Modal/ConfirmModal";
|
||||
import ModelAPI from "Common/UI/Utils/ModelAPI/ModelAPI";
|
||||
import API from "Common/UI/Utils/API/API";
|
||||
import UUID from "Common/Utils/UUID";
|
||||
import ComponentID from "Common/Types/Workflow/ComponentID";
|
||||
import { JSONObject } from "Common/Types/JSON";
|
||||
import {
|
||||
ComponentType,
|
||||
NodeDataProp,
|
||||
NodeType,
|
||||
} from "Common/Types/Workflow/Component";
|
||||
import { useAsyncEffect } from "use-async-effect";
|
||||
import { Node } from "reactflow";
|
||||
|
||||
const Settings: FunctionComponent<PageComponentProps> = (): ReactElement => {
|
||||
const modelId: ObjectID = Navigation.getLastParamAsObjectID(1);
|
||||
@@ -23,6 +32,44 @@ const Settings: FunctionComponent<PageComponentProps> = (): ReactElement => {
|
||||
useState<boolean>(false);
|
||||
const [refresher, setRefresher] = useState<boolean>(false);
|
||||
const [error, setError] = useState<string>("");
|
||||
const [isWebhookTrigger, setIsWebhookTrigger] = useState<boolean>(false);
|
||||
|
||||
useAsyncEffect(async () => {
|
||||
try {
|
||||
const workflow: Workflow | null = await ModelAPI.getItem({
|
||||
modelType: Workflow,
|
||||
id: modelId,
|
||||
select: {
|
||||
graph: true,
|
||||
},
|
||||
requestOptions: {},
|
||||
});
|
||||
|
||||
if (
|
||||
workflow?.graph &&
|
||||
(workflow.graph as JSONObject)["nodes"]
|
||||
) {
|
||||
const nodes: Array<JSONObject> = (workflow.graph as JSONObject)[
|
||||
"nodes"
|
||||
] as Array<JSONObject>;
|
||||
|
||||
for (const node of nodes) {
|
||||
const nodeData: NodeDataProp = node["data"] as any;
|
||||
|
||||
if (
|
||||
nodeData.componentType === ComponentType.Trigger &&
|
||||
nodeData.nodeType === NodeType.Node &&
|
||||
nodeData.metadataId === ComponentID.Webhook
|
||||
) {
|
||||
setIsWebhookTrigger(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (_err) {
|
||||
// ignore - just don't show the webhook section
|
||||
}
|
||||
}, []);
|
||||
|
||||
const resetSecretKey: () => void = (): void => {
|
||||
setShowResetConfirmation(false);
|
||||
@@ -44,46 +91,48 @@ const Settings: FunctionComponent<PageComponentProps> = (): ReactElement => {
|
||||
|
||||
return (
|
||||
<Fragment>
|
||||
<CardModelDetail<Workflow>
|
||||
name="Workflow > Webhook Secret Key"
|
||||
cardProps={{
|
||||
title: "Webhook Secret Key",
|
||||
description:
|
||||
"This secret key is used to trigger this workflow via webhook. Use this key in the webhook URL instead of the workflow ID for security. You can reset this key if it is compromised.",
|
||||
buttons: [
|
||||
{
|
||||
title: "Reset Secret Key",
|
||||
buttonStyle: ButtonStyleType.DANGER_OUTLINE,
|
||||
onClick: () => {
|
||||
setShowResetConfirmation(true);
|
||||
{isWebhookTrigger && (
|
||||
<CardModelDetail<Workflow>
|
||||
name="Workflow > Webhook Secret Key"
|
||||
cardProps={{
|
||||
title: "Webhook Secret Key",
|
||||
description:
|
||||
"This secret key is used to trigger this workflow via webhook. Use this key in the webhook URL instead of the workflow ID for security. You can reset this key if it is compromised.",
|
||||
buttons: [
|
||||
{
|
||||
title: "Reset Secret Key",
|
||||
buttonStyle: ButtonStyleType.DANGER_OUTLINE,
|
||||
onClick: () => {
|
||||
setShowResetConfirmation(true);
|
||||
},
|
||||
icon: IconProp.Refresh,
|
||||
},
|
||||
icon: IconProp.Refresh,
|
||||
},
|
||||
],
|
||||
}}
|
||||
isEditable={false}
|
||||
refresher={refresher}
|
||||
modelDetailProps={{
|
||||
showDetailsInNumberOfColumns: 1,
|
||||
modelType: Workflow,
|
||||
id: "model-detail-workflow-webhook-secret",
|
||||
fields: [
|
||||
{
|
||||
field: {
|
||||
webhookSecretKey: true,
|
||||
],
|
||||
}}
|
||||
isEditable={false}
|
||||
refresher={refresher}
|
||||
modelDetailProps={{
|
||||
showDetailsInNumberOfColumns: 1,
|
||||
modelType: Workflow,
|
||||
id: "model-detail-workflow-webhook-secret",
|
||||
fields: [
|
||||
{
|
||||
field: {
|
||||
webhookSecretKey: true,
|
||||
},
|
||||
fieldType: FieldType.HiddenText,
|
||||
title: "Webhook Secret Key",
|
||||
placeholder:
|
||||
"No secret key generated yet. Save the workflow to generate one.",
|
||||
opts: {
|
||||
isCopyable: true,
|
||||
},
|
||||
},
|
||||
fieldType: FieldType.HiddenText,
|
||||
title: "Webhook Secret Key",
|
||||
placeholder:
|
||||
"No secret key generated yet. Save the workflow to generate one.",
|
||||
opts: {
|
||||
isCopyable: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
modelId: modelId,
|
||||
}}
|
||||
/>
|
||||
],
|
||||
modelId: modelId,
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{showResetConfirmation && (
|
||||
<ConfirmModal
|
||||
|
||||
@@ -15,6 +15,7 @@ import logger from "Common/Server/Utils/Logger";
|
||||
import Realtime from "Common/Server/Utils/Realtime";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import "ejs";
|
||||
import OpenAPIUtil from "Common/Server/Utils/OpenAPI";
|
||||
|
||||
@@ -27,6 +28,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
const statusCheck: PromiseVoidFunction = async (): Promise<void> => {
|
||||
// Check the status of infrastructure components
|
||||
return await InfrastructureStatus.checkStatusWithRetry({
|
||||
|
||||
@@ -406,6 +406,9 @@ export const IpWhitelist: string = process.env["IP_WHITELIST"] || "";
|
||||
export const DisableTelemetry: boolean =
|
||||
process.env["DISABLE_TELEMETRY"] === "true";
|
||||
|
||||
export const EnableProfiling: boolean =
|
||||
process.env["ENABLE_PROFILING"] === "true";
|
||||
|
||||
export const IsEnterpriseEdition: boolean =
|
||||
process.env["IS_ENTERPRISE_EDITION"] === "true";
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ import { IsBillingEnabled } from "../EnvironmentConfig";
|
||||
import logger from "../Utils/Logger";
|
||||
import TelemetryUtil from "../Utils/Telemetry/Telemetry";
|
||||
import MetricService from "./MetricService";
|
||||
import GlobalConfigService from "./GlobalConfigService";
|
||||
import GlobalConfig from "../../Models/DatabaseModels/GlobalConfig";
|
||||
import OneUptimeDate from "../../Types/Date";
|
||||
import Metric, {
|
||||
MetricPointType,
|
||||
@@ -1060,6 +1062,41 @@ ${alertSeverity.name}
|
||||
});
|
||||
}
|
||||
|
||||
private static readonly DEFAULT_METRIC_RETENTION_DAYS: number = 180;
|
||||
|
||||
private async getMetricRetentionDays(): Promise<number> {
|
||||
try {
|
||||
const globalConfig: GlobalConfig | null =
|
||||
await GlobalConfigService.findOneBy({
|
||||
query: {
|
||||
_id: ObjectID.getZeroObjectID().toString(),
|
||||
},
|
||||
props: {
|
||||
isRoot: true,
|
||||
},
|
||||
select: {
|
||||
monitorMetricRetentionInDays: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (
|
||||
globalConfig &&
|
||||
globalConfig.monitorMetricRetentionInDays !== undefined &&
|
||||
globalConfig.monitorMetricRetentionInDays !== null &&
|
||||
globalConfig.monitorMetricRetentionInDays > 0
|
||||
) {
|
||||
return globalConfig.monitorMetricRetentionInDays;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
"Error fetching metric retention config, using default:",
|
||||
);
|
||||
logger.error(error);
|
||||
}
|
||||
|
||||
return Service.DEFAULT_METRIC_RETENTION_DAYS;
|
||||
}
|
||||
|
||||
@CaptureSpan()
|
||||
public async refreshAlertMetrics(data: { alertId: ObjectID }): Promise<void> {
|
||||
const alert: Model | null = await this.findOneById({
|
||||
@@ -1130,6 +1167,12 @@ ${alertSeverity.name}
|
||||
const itemsToSave: Array<Metric> = [];
|
||||
const metricTypesMap: Dictionary<MetricType> = {};
|
||||
|
||||
const metricRetentionDays: number = await this.getMetricRetentionDays();
|
||||
const alertMetricRetentionDate: Date = OneUptimeDate.addRemoveDays(
|
||||
OneUptimeDate.getCurrentDate(),
|
||||
metricRetentionDays,
|
||||
);
|
||||
|
||||
// now we need to create new metrics for this alert - TimeToAcknowledge, TimeToResolve, AlertCount, AlertDuration
|
||||
const alertStartsAt: Date =
|
||||
firstAlertStateTimeline?.startsAt ||
|
||||
@@ -1160,6 +1203,7 @@ ${alertSeverity.name}
|
||||
alertCountMetric.time,
|
||||
);
|
||||
alertCountMetric.metricPointType = MetricPointType.Sum;
|
||||
alertCountMetric.retentionDate = alertMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(alertCountMetric);
|
||||
|
||||
@@ -1214,6 +1258,7 @@ ${alertSeverity.name}
|
||||
timeToAcknowledgeMetric.time,
|
||||
);
|
||||
timeToAcknowledgeMetric.metricPointType = MetricPointType.Sum;
|
||||
timeToAcknowledgeMetric.retentionDate = alertMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(timeToAcknowledgeMetric);
|
||||
|
||||
@@ -1270,6 +1315,7 @@ ${alertSeverity.name}
|
||||
timeToResolveMetric.time,
|
||||
);
|
||||
timeToResolveMetric.metricPointType = MetricPointType.Sum;
|
||||
timeToResolveMetric.retentionDate = alertMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(timeToResolveMetric);
|
||||
|
||||
@@ -1319,6 +1365,7 @@ ${alertSeverity.name}
|
||||
alertDurationMetric.time,
|
||||
);
|
||||
alertDurationMetric.metricPointType = MetricPointType.Sum;
|
||||
alertDurationMetric.retentionDate = alertMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(alertDurationMetric);
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ import MonitorStatusTimeline from "../../Models/DatabaseModels/MonitorStatusTime
|
||||
import User from "../../Models/DatabaseModels/User";
|
||||
import { IsBillingEnabled } from "../EnvironmentConfig";
|
||||
import MetricService from "./MetricService";
|
||||
import GlobalConfigService from "./GlobalConfigService";
|
||||
import GlobalConfig from "../../Models/DatabaseModels/GlobalConfig";
|
||||
import IncidentMetricType from "../../Types/Incident/IncidentMetricType";
|
||||
import Metric, {
|
||||
MetricPointType,
|
||||
@@ -1396,6 +1398,12 @@ ${incident.remediationNotes || "No remediation notes provided."}
|
||||
postmortemMetric.time,
|
||||
);
|
||||
postmortemMetric.metricPointType = MetricPointType.Sum;
|
||||
const postmortemRetentionDays: number =
|
||||
await this.getMetricRetentionDays();
|
||||
postmortemMetric.retentionDate = OneUptimeDate.addRemoveDays(
|
||||
OneUptimeDate.getCurrentDate(),
|
||||
postmortemRetentionDays,
|
||||
);
|
||||
|
||||
await MetricService.create({
|
||||
data: postmortemMetric,
|
||||
@@ -1583,6 +1591,12 @@ ${incidentSeverity.name}
|
||||
severityChangeMetric.time,
|
||||
);
|
||||
severityChangeMetric.metricPointType = MetricPointType.Sum;
|
||||
const severityRetentionDays: number =
|
||||
await this.getMetricRetentionDays();
|
||||
severityChangeMetric.retentionDate = OneUptimeDate.addRemoveDays(
|
||||
OneUptimeDate.getCurrentDate(),
|
||||
severityRetentionDays,
|
||||
);
|
||||
|
||||
await MetricService.create({
|
||||
data: severityChangeMetric,
|
||||
@@ -2075,6 +2089,41 @@ ${incidentSeverity.name}
|
||||
});
|
||||
}
|
||||
|
||||
private static readonly DEFAULT_METRIC_RETENTION_DAYS: number = 180;
|
||||
|
||||
private async getMetricRetentionDays(): Promise<number> {
|
||||
try {
|
||||
const globalConfig: GlobalConfig | null =
|
||||
await GlobalConfigService.findOneBy({
|
||||
query: {
|
||||
_id: ObjectID.getZeroObjectID().toString(),
|
||||
},
|
||||
props: {
|
||||
isRoot: true,
|
||||
},
|
||||
select: {
|
||||
monitorMetricRetentionInDays: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (
|
||||
globalConfig &&
|
||||
globalConfig.monitorMetricRetentionInDays !== undefined &&
|
||||
globalConfig.monitorMetricRetentionInDays !== null &&
|
||||
globalConfig.monitorMetricRetentionInDays > 0
|
||||
) {
|
||||
return globalConfig.monitorMetricRetentionInDays;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
"Error fetching metric retention config, using default:",
|
||||
);
|
||||
logger.error(error);
|
||||
}
|
||||
|
||||
return Service.DEFAULT_METRIC_RETENTION_DAYS;
|
||||
}
|
||||
|
||||
@CaptureSpan()
|
||||
public async refreshIncidentMetrics(data: {
|
||||
incidentId: ObjectID;
|
||||
@@ -2223,6 +2272,12 @@ ${incidentSeverity.name}
|
||||
|
||||
const itemsToSave: Array<Metric> = [];
|
||||
|
||||
const metricRetentionDays: number = await this.getMetricRetentionDays();
|
||||
const incidentMetricRetentionDate: Date = OneUptimeDate.addRemoveDays(
|
||||
OneUptimeDate.getCurrentDate(),
|
||||
metricRetentionDays,
|
||||
);
|
||||
|
||||
// now we need to create new metrics for this incident - TimeToAcknowledge, TimeToResolve, IncidentCount, IncidentDuration
|
||||
|
||||
const incidentStartsAt: Date =
|
||||
@@ -2270,6 +2325,7 @@ ${incidentSeverity.name}
|
||||
incidentCountMetric.time,
|
||||
);
|
||||
incidentCountMetric.metricPointType = MetricPointType.Sum;
|
||||
incidentCountMetric.retentionDate = incidentMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(incidentCountMetric);
|
||||
|
||||
@@ -2321,6 +2377,7 @@ ${incidentSeverity.name}
|
||||
timeToAcknowledgeMetric.time,
|
||||
);
|
||||
timeToAcknowledgeMetric.metricPointType = MetricPointType.Sum;
|
||||
timeToAcknowledgeMetric.retentionDate = incidentMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(timeToAcknowledgeMetric);
|
||||
|
||||
@@ -2374,6 +2431,7 @@ ${incidentSeverity.name}
|
||||
timeToResolveMetric.time,
|
||||
);
|
||||
timeToResolveMetric.metricPointType = MetricPointType.Sum;
|
||||
timeToResolveMetric.retentionDate = incidentMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(timeToResolveMetric);
|
||||
|
||||
@@ -2422,6 +2480,7 @@ ${incidentSeverity.name}
|
||||
incidentDurationMetric.time,
|
||||
);
|
||||
incidentDurationMetric.metricPointType = MetricPointType.Sum;
|
||||
incidentDurationMetric.retentionDate = incidentMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(incidentDurationMetric);
|
||||
|
||||
@@ -2474,6 +2533,7 @@ ${incidentSeverity.name}
|
||||
timeInStateMetric.time,
|
||||
);
|
||||
timeInStateMetric.metricPointType = MetricPointType.Sum;
|
||||
timeInStateMetric.retentionDate = incidentMetricRetentionDate;
|
||||
|
||||
itemsToSave.push(timeInStateMetric);
|
||||
}
|
||||
|
||||
@@ -676,6 +676,7 @@ export class LogAggregationService {
|
||||
bodySearchText?: string | undefined;
|
||||
traceIds?: Array<string> | undefined;
|
||||
spanIds?: Array<string> | undefined;
|
||||
attributes?: Record<string, string> | undefined;
|
||||
}): Promise<Array<JSONObject>> {
|
||||
const maxLimit: number = Math.min(request.limit || 10000, 10000);
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { WorkflowHostname } from "../EnvironmentConfig";
|
||||
import ClusterKeyAuthorization from "../Middleware/ClusterKeyAuthorization";
|
||||
import CreateBy from "../Types/Database/CreateBy";
|
||||
import { OnCreate, OnUpdate } from "../Types/Database/Hooks";
|
||||
import DatabaseService from "./DatabaseService";
|
||||
import EmptyResponseData from "../../Types/API/EmptyResponse";
|
||||
@@ -26,18 +25,29 @@ export class Service extends DatabaseService<Model> {
|
||||
}
|
||||
|
||||
@CaptureSpan()
|
||||
protected override async onBeforeCreate(
|
||||
createBy: CreateBy<Model>,
|
||||
): Promise<OnCreate<Model>> {
|
||||
protected override async onCreateSuccess(
|
||||
_onCreate: OnCreate<Model>,
|
||||
createdItem: Model,
|
||||
): Promise<Model> {
|
||||
// Auto-generate webhook secret key for new workflows.
|
||||
if (!createBy.data.webhookSecretKey) {
|
||||
createBy.data.webhookSecretKey = UUID.generate();
|
||||
if (!createdItem.webhookSecretKey && createdItem._id) {
|
||||
const secretKey: string = UUID.generate();
|
||||
|
||||
await this.updateOneById({
|
||||
id: new ObjectID(createdItem._id),
|
||||
data: {
|
||||
webhookSecretKey: secretKey,
|
||||
} as any,
|
||||
props: {
|
||||
isRoot: true,
|
||||
ignoreHooks: true,
|
||||
},
|
||||
});
|
||||
|
||||
createdItem.webhookSecretKey = secretKey;
|
||||
}
|
||||
|
||||
return {
|
||||
createBy,
|
||||
carryForward: null,
|
||||
};
|
||||
return createdItem;
|
||||
}
|
||||
|
||||
@CaptureSpan()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logger from "../Logger";
|
||||
import LogAggregationService from "../../Services/LogAggregationService";
|
||||
import VMUtil from "../VM/VMAPI";
|
||||
import APIRequestCriteria from "./Criteria/APIRequestCriteria";
|
||||
import CustomCodeMonitoringCriteria from "./Criteria/CustomCodeMonitorCriteria";
|
||||
@@ -116,7 +117,7 @@ export default class MonitorCriteriaEvaluator {
|
||||
`;
|
||||
|
||||
const contextBlock: string | null =
|
||||
MonitorCriteriaEvaluator.buildRootCauseContext({
|
||||
await MonitorCriteriaEvaluator.buildRootCauseContext({
|
||||
dataToProcess: input.dataToProcess,
|
||||
monitorStep: input.monitorStep,
|
||||
monitor: input.monitor,
|
||||
@@ -557,14 +558,16 @@ ${contextBlock}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static buildRootCauseContext(input: {
|
||||
private static async buildRootCauseContext(input: {
|
||||
dataToProcess: DataToProcess;
|
||||
monitorStep: MonitorStep;
|
||||
monitor: Monitor;
|
||||
}): string | null {
|
||||
}): Promise<string | null> {
|
||||
// Handle Kubernetes monitors with rich resource context
|
||||
if (input.monitor.monitorType === MonitorType.Kubernetes) {
|
||||
return MonitorCriteriaEvaluator.buildKubernetesRootCauseContext(input);
|
||||
return await MonitorCriteriaEvaluator.buildKubernetesRootCauseContext(
|
||||
input,
|
||||
);
|
||||
}
|
||||
|
||||
const requestDetails: Array<string> = [];
|
||||
@@ -675,11 +678,11 @@ ${contextBlock}
|
||||
return sections.join("\n");
|
||||
}
|
||||
|
||||
private static buildKubernetesRootCauseContext(input: {
|
||||
private static async buildKubernetesRootCauseContext(input: {
|
||||
dataToProcess: DataToProcess;
|
||||
monitorStep: MonitorStep;
|
||||
monitor: Monitor;
|
||||
}): string | null {
|
||||
}): Promise<string | null> {
|
||||
const metricResponse: MetricMonitorResponse =
|
||||
input.dataToProcess as MetricMonitorResponse;
|
||||
|
||||
@@ -730,7 +733,7 @@ ${contextBlock}
|
||||
);
|
||||
|
||||
if (sortedResources.length === 0) {
|
||||
continue;
|
||||
return sections.join("\n");
|
||||
}
|
||||
|
||||
// Show top 10 affected resources
|
||||
@@ -833,6 +836,73 @@ ${contextBlock}
|
||||
if (analysis) {
|
||||
sections.push(`\n\n**Root Cause Analysis**\n${analysis}`);
|
||||
}
|
||||
|
||||
// Fetch recent container logs for the top affected resource during CrashLoopBackOff
|
||||
if (
|
||||
(breakdown.metricName === "k8s.container.restarts" ||
|
||||
breakdown.metricName.includes("restart")) &&
|
||||
input.monitor.projectId
|
||||
) {
|
||||
const topResource: KubernetesAffectedResource = resourcesToShow[0]!;
|
||||
|
||||
try {
|
||||
const logAttributes: Record<string, string> = {};
|
||||
|
||||
if (breakdown.clusterName) {
|
||||
logAttributes["resource.k8s.cluster.name"] =
|
||||
breakdown.clusterName;
|
||||
}
|
||||
|
||||
if (topResource.podName) {
|
||||
logAttributes["resource.k8s.pod.name"] = topResource.podName;
|
||||
}
|
||||
|
||||
if (topResource.containerName) {
|
||||
logAttributes["resource.k8s.container.name"] =
|
||||
topResource.containerName;
|
||||
}
|
||||
|
||||
if (topResource.namespace) {
|
||||
logAttributes["resource.k8s.namespace.name"] =
|
||||
topResource.namespace;
|
||||
}
|
||||
|
||||
const now: Date = OneUptimeDate.getCurrentDate();
|
||||
const fifteenMinutesAgo: Date =
|
||||
OneUptimeDate.addRemoveMinutes(now, -15);
|
||||
|
||||
const logs: Array<JSONObject> =
|
||||
await LogAggregationService.getExportLogs({
|
||||
projectId: input.monitor.projectId,
|
||||
startTime: fifteenMinutesAgo,
|
||||
endTime: now,
|
||||
limit: 50,
|
||||
attributes: logAttributes,
|
||||
});
|
||||
|
||||
if (logs.length > 0) {
|
||||
const logLines: Array<string> = logs.map((log: JSONObject) => {
|
||||
const timestamp: string = log["time"]
|
||||
? String(log["time"])
|
||||
: "";
|
||||
const severity: string = log["severityText"]
|
||||
? String(log["severityText"])
|
||||
: "INFO";
|
||||
const body: string = log["body"] ? String(log["body"]) : "";
|
||||
return `\`${timestamp}\` **${severity}** ${body}`;
|
||||
});
|
||||
|
||||
sections.push(
|
||||
`\n\n**Recent Container Logs** (${topResource.podName || "unknown pod"} / ${topResource.containerName || "unknown container"}, last 15 minutes)\n\n${logLines.join("\n\n")}`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(
|
||||
"Failed to fetch container logs for root cause context",
|
||||
);
|
||||
logger.error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sections.join("\n");
|
||||
|
||||
581
Common/Server/Utils/Profiling.ts
Normal file
581
Common/Server/Utils/Profiling.ts
Normal file
@@ -0,0 +1,581 @@
|
||||
import inspector from "inspector";
|
||||
import http from "http";
|
||||
import https from "https";
|
||||
import zlib from "zlib";
|
||||
import { URL as NodeURL } from "url";
|
||||
import Dictionary from "../../Types/Dictionary";
|
||||
import {
|
||||
AppVersion,
|
||||
Env,
|
||||
DisableTelemetry,
|
||||
EnableProfiling,
|
||||
} from "../EnvironmentConfig";
|
||||
import logger from "./Logger";
|
||||
|
||||
// V8 CPU Profile types from the inspector module
|
||||
interface V8CallFrame {
|
||||
functionName: string;
|
||||
scriptId: string;
|
||||
url: string;
|
||||
lineNumber: number;
|
||||
columnNumber: number;
|
||||
}
|
||||
|
||||
interface V8CpuProfileNode {
|
||||
id: number;
|
||||
callFrame: V8CallFrame;
|
||||
hitCount: number;
|
||||
children?: Array<number>;
|
||||
}
|
||||
|
||||
interface V8CpuProfile {
|
||||
nodes: Array<V8CpuProfileNode>;
|
||||
startTime: number; // microseconds (monotonic clock)
|
||||
endTime: number; // microseconds (monotonic clock)
|
||||
samples: Array<number>; // node IDs
|
||||
timeDeltas: Array<number>; // microseconds between samples
|
||||
}
|
||||
|
||||
export default class Profiling {
|
||||
private static session: inspector.Session | null = null;
|
||||
private static intervalId: ReturnType<typeof setInterval> | null = null;
|
||||
private static serviceName: string = "";
|
||||
private static isCollecting: boolean = false;
|
||||
|
||||
// Profile every 60 seconds, sample for 10 seconds each time
|
||||
private static readonly PROFILING_INTERVAL_MS: number = 60_000;
|
||||
private static readonly PROFILING_DURATION_MS: number = 10_000;
|
||||
|
||||
public static init(data: { serviceName: string }): void {
|
||||
if (!EnableProfiling) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (DisableTelemetry) {
|
||||
return;
|
||||
}
|
||||
|
||||
const endpoint: string | null = this.getOtlpProfilesEndpoint();
|
||||
const headers: Dictionary<string> = this.getHeaders();
|
||||
|
||||
if (!endpoint || Object.keys(headers).length === 0) {
|
||||
logger.warn(
|
||||
"Profiling enabled but OTLP endpoint or headers not configured. Skipping profiling initialization.",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
this.serviceName = data.serviceName;
|
||||
|
||||
try {
|
||||
this.session = new inspector.Session();
|
||||
this.session.connect();
|
||||
|
||||
this.postToSession("Profiler.enable")
|
||||
.then(() => {
|
||||
logger.info(
|
||||
`CPU profiling initialized for service: ${data.serviceName}`,
|
||||
);
|
||||
this.startProfilingLoop();
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
logger.error("Failed to enable V8 profiler:");
|
||||
logger.error(err);
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error("Failed to initialize profiling session:");
|
||||
logger.error(err);
|
||||
}
|
||||
|
||||
process.on("SIGTERM", () => {
|
||||
this.stop();
|
||||
});
|
||||
}
|
||||
|
||||
public static stop(): void {
|
||||
if (this.intervalId) {
|
||||
clearInterval(this.intervalId);
|
||||
this.intervalId = null;
|
||||
}
|
||||
|
||||
if (this.session) {
|
||||
try {
|
||||
this.session.post("Profiler.disable");
|
||||
this.session.disconnect();
|
||||
} catch {
|
||||
// Ignore errors during cleanup
|
||||
}
|
||||
this.session = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static getOtlpProfilesEndpoint(): string | null {
|
||||
const base: string | undefined =
|
||||
process.env["OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT"];
|
||||
if (!base) {
|
||||
return null;
|
||||
}
|
||||
return `${base}/v1/profiles`;
|
||||
}
|
||||
|
||||
private static getHeaders(): Dictionary<string> {
|
||||
if (!process.env["OPENTELEMETRY_EXPORTER_OTLP_HEADERS"]) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const headersStrings: Array<string> =
|
||||
process.env["OPENTELEMETRY_EXPORTER_OTLP_HEADERS"].split(";");
|
||||
|
||||
const headers: Dictionary<string> = {};
|
||||
|
||||
for (const headerString of headersStrings) {
|
||||
const parts: Array<string> = headerString.split("=");
|
||||
if (parts.length === 2) {
|
||||
headers[parts[0]!.toString()] = parts[1]!.toString();
|
||||
}
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
private static startProfilingLoop(): void {
|
||||
// Start the first collection after a short delay
|
||||
setTimeout(() => {
|
||||
this.collectAndSendProfile().catch((err: unknown) => {
|
||||
logger.error("Error in initial profile collection:");
|
||||
logger.error(err);
|
||||
});
|
||||
}, 5000);
|
||||
|
||||
this.intervalId = setInterval(() => {
|
||||
this.collectAndSendProfile().catch((err: unknown) => {
|
||||
logger.error("Error in profile collection:");
|
||||
logger.error(err);
|
||||
});
|
||||
}, this.PROFILING_INTERVAL_MS);
|
||||
}
|
||||
|
||||
private static async collectAndSendProfile(): Promise<void> {
|
||||
if (!this.session || this.isCollecting) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isCollecting = true;
|
||||
const wallClockStartMs: number = Date.now();
|
||||
|
||||
try {
|
||||
await this.postToSession("Profiler.start");
|
||||
|
||||
await new Promise<void>((resolve: () => void) => {
|
||||
return setTimeout(resolve, this.PROFILING_DURATION_MS);
|
||||
});
|
||||
|
||||
const wallClockEndMs: number = Date.now();
|
||||
const result: unknown = await this.postToSession("Profiler.stop");
|
||||
const profile: V8CpuProfile | undefined = (
|
||||
result as { profile?: V8CpuProfile }
|
||||
)?.profile;
|
||||
|
||||
if (!profile || !profile.samples || profile.samples.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const otlpPayload: object = this.convertV8ProfileToOTLP(
|
||||
profile,
|
||||
wallClockStartMs,
|
||||
wallClockEndMs,
|
||||
);
|
||||
|
||||
await this.sendProfile(otlpPayload);
|
||||
} catch (err) {
|
||||
logger.error("Error collecting/sending profile:");
|
||||
logger.error(err);
|
||||
} finally {
|
||||
this.isCollecting = false;
|
||||
}
|
||||
}
|
||||
|
||||
private static postToSession(
|
||||
method: string,
|
||||
params?: object,
|
||||
): Promise<unknown> {
|
||||
return new Promise<unknown>(
|
||||
(resolve: (value: unknown) => void, reject: (reason: Error) => void) => {
|
||||
if (!this.session) {
|
||||
reject(new Error("Inspector session not available"));
|
||||
return;
|
||||
}
|
||||
|
||||
this.session.post(
|
||||
method,
|
||||
params || {},
|
||||
(err: Error | null, result?: object) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve(result);
|
||||
}
|
||||
},
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
private static convertV8ProfileToOTLP(
|
||||
v8Profile: V8CpuProfile,
|
||||
wallClockStartMs: number,
|
||||
wallClockEndMs: number,
|
||||
): object {
|
||||
// Build node lookup and parent maps
|
||||
const nodeMap: Map<number, V8CpuProfileNode> = new Map<
|
||||
number,
|
||||
V8CpuProfileNode
|
||||
>();
|
||||
const parentMap: Map<number, number> = new Map<number, number>();
|
||||
|
||||
for (const node of v8Profile.nodes) {
|
||||
nodeMap.set(node.id, node);
|
||||
if (node.children) {
|
||||
for (const childId of node.children) {
|
||||
parentMap.set(childId, node.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// String table with deduplication
|
||||
const stringTable: Array<string> = [""];
|
||||
const stringIndexMap: Map<string, number> = new Map<string, number>();
|
||||
stringIndexMap.set("", 0);
|
||||
|
||||
const getStringIndex: (s: string) => number = (s: string): number => {
|
||||
let idx: number | undefined = stringIndexMap.get(s);
|
||||
if (idx === undefined) {
|
||||
idx = stringTable.length;
|
||||
stringTable.push(s);
|
||||
stringIndexMap.set(s, idx);
|
||||
}
|
||||
return idx;
|
||||
};
|
||||
|
||||
// Predefined string indices for sample types
|
||||
const cpuTypeIdx: number = getStringIndex("cpu");
|
||||
const nanosecondsIdx: number = getStringIndex("nanoseconds");
|
||||
const samplesTypeIdx: number = getStringIndex("samples");
|
||||
const countIdx: number = getStringIndex("count");
|
||||
|
||||
// Build function and location tables
|
||||
const functionTable: Array<{ name: number; filename: number }> = [];
|
||||
const locationTable: Array<{
|
||||
line: Array<{ functionIndex: number; line: number }>;
|
||||
}> = [];
|
||||
|
||||
const funcIndexMap: Map<string, number> = new Map<string, number>();
|
||||
const locationIndexMap: Map<string, number> = new Map<string, number>();
|
||||
|
||||
const getLocationIndex: (node: V8CpuProfileNode) => number = (
|
||||
node: V8CpuProfileNode,
|
||||
): number => {
|
||||
const locKey: string = `${node.callFrame.functionName}|${node.callFrame.url}|${node.callFrame.lineNumber}`;
|
||||
let locIdx: number | undefined = locationIndexMap.get(locKey);
|
||||
if (locIdx !== undefined) {
|
||||
return locIdx;
|
||||
}
|
||||
|
||||
// Ensure function entry exists
|
||||
const fKey: string = `${node.callFrame.functionName}|${node.callFrame.url}`;
|
||||
let fIdx: number | undefined = funcIndexMap.get(fKey);
|
||||
if (fIdx === undefined) {
|
||||
fIdx = functionTable.length;
|
||||
functionTable.push({
|
||||
name: getStringIndex(node.callFrame.functionName || "(anonymous)"),
|
||||
filename: getStringIndex(node.callFrame.url || ""),
|
||||
});
|
||||
funcIndexMap.set(fKey, fIdx);
|
||||
}
|
||||
|
||||
locIdx = locationTable.length;
|
||||
locationTable.push({
|
||||
line: [
|
||||
{
|
||||
functionIndex: fIdx,
|
||||
line: Math.max(0, node.callFrame.lineNumber + 1), // V8 uses 0-based line numbers
|
||||
},
|
||||
],
|
||||
});
|
||||
locationIndexMap.set(locKey, locIdx);
|
||||
|
||||
return locIdx;
|
||||
};
|
||||
|
||||
// Build stack table from samples
|
||||
const stackTable: Array<{ locationIndices: Array<number> }> = [];
|
||||
const stackKeyMap: Map<string, number> = new Map<string, number>();
|
||||
|
||||
const getStackIndex: (leafNodeId: number) => number = (
|
||||
leafNodeId: number,
|
||||
): number => {
|
||||
const locationIndices: Array<number> = [];
|
||||
let currentId: number | undefined = leafNodeId;
|
||||
|
||||
while (currentId !== undefined) {
|
||||
const node: V8CpuProfileNode | undefined = nodeMap.get(currentId);
|
||||
if (!node) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip V8 internal nodes
|
||||
const fnName: string = node.callFrame.functionName;
|
||||
if (
|
||||
fnName !== "(root)" &&
|
||||
fnName !== "(program)" &&
|
||||
fnName !== "(idle)" &&
|
||||
fnName !== "(garbage collector)"
|
||||
) {
|
||||
locationIndices.push(getLocationIndex(node));
|
||||
}
|
||||
|
||||
currentId = parentMap.get(currentId);
|
||||
}
|
||||
|
||||
const key: string = locationIndices.join(",");
|
||||
let stackIdx: number | undefined = stackKeyMap.get(key);
|
||||
if (stackIdx === undefined) {
|
||||
stackIdx = stackTable.length;
|
||||
stackTable.push({ locationIndices });
|
||||
stackKeyMap.set(key, stackIdx);
|
||||
}
|
||||
|
||||
return stackIdx;
|
||||
};
|
||||
|
||||
// Use wall clock for absolute timestamps (V8 uses monotonic clock)
|
||||
const NANOS_PER_MS: bigint = BigInt(1000000);
|
||||
const NANOS_PER_US: bigint = BigInt(1000);
|
||||
const ZERO: bigint = BigInt(0);
|
||||
|
||||
const startTimeNano: bigint = BigInt(wallClockStartMs) * NANOS_PER_MS;
|
||||
const endTimeNano: bigint = BigInt(wallClockEndMs) * NANOS_PER_MS;
|
||||
|
||||
// Build sample entries
|
||||
const samples: Array<{
|
||||
stackIndex: number;
|
||||
value: Array<string>;
|
||||
timestampsUnixNano: Array<string>;
|
||||
}> = [];
|
||||
|
||||
let cumulativeDeltaNano: bigint = ZERO;
|
||||
const totalV8DurationUs: bigint = BigInt(
|
||||
v8Profile.endTime - v8Profile.startTime,
|
||||
);
|
||||
const totalWallDurationNano: bigint = endTimeNano - startTimeNano;
|
||||
|
||||
for (let i: number = 0; i < v8Profile.samples.length; i++) {
|
||||
const nodeId: number = v8Profile.samples[i]!;
|
||||
const node: V8CpuProfileNode | undefined = nodeMap.get(nodeId);
|
||||
|
||||
// Accumulate time delta
|
||||
const deltaUs: bigint = BigInt(v8Profile.timeDeltas[i] || 0);
|
||||
cumulativeDeltaNano = cumulativeDeltaNano + deltaUs * NANOS_PER_US;
|
||||
|
||||
if (!node) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip idle/root/program/gc samples
|
||||
const fnName: string = node.callFrame.functionName;
|
||||
if (
|
||||
fnName === "(idle)" ||
|
||||
fnName === "(root)" ||
|
||||
fnName === "(program)" ||
|
||||
fnName === "(garbage collector)"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Map V8 monotonic time to wall clock time proportionally
|
||||
const sampleTimeNano: bigint =
|
||||
totalV8DurationUs > ZERO
|
||||
? startTimeNano +
|
||||
(cumulativeDeltaNano * totalWallDurationNano) /
|
||||
(totalV8DurationUs * NANOS_PER_US)
|
||||
: startTimeNano + cumulativeDeltaNano;
|
||||
|
||||
const timeDeltaNano: bigint = deltaUs * NANOS_PER_US;
|
||||
|
||||
const stackIndex: number = getStackIndex(nodeId);
|
||||
|
||||
samples.push({
|
||||
stackIndex,
|
||||
value: [timeDeltaNano.toString(), "1"],
|
||||
timestampsUnixNano: [sampleTimeNano.toString()],
|
||||
});
|
||||
}
|
||||
|
||||
// If no meaningful samples were collected, return an empty payload
|
||||
if (samples.length === 0) {
|
||||
return { resourceProfiles: [] };
|
||||
}
|
||||
|
||||
// Compute average sampling period in nanoseconds
|
||||
const avgPeriodNs: number =
|
||||
v8Profile.samples.length > 0
|
||||
? Math.trunc(
|
||||
((v8Profile.endTime - v8Profile.startTime) * 1000) /
|
||||
v8Profile.samples.length,
|
||||
)
|
||||
: 1_000_000; // default 1ms
|
||||
|
||||
// Generate a random profile ID (16 bytes as base64)
|
||||
const profileIdBytes: Buffer = Buffer.alloc(16);
|
||||
for (let i: number = 0; i < 16; i++) {
|
||||
profileIdBytes[i] = Math.floor(Math.random() * 256);
|
||||
}
|
||||
const profileId: string = profileIdBytes.toString("base64");
|
||||
|
||||
return {
|
||||
resourceProfiles: [
|
||||
{
|
||||
resource: {
|
||||
attributes: [
|
||||
{
|
||||
key: "service.name",
|
||||
value: { stringValue: this.serviceName },
|
||||
},
|
||||
{
|
||||
key: "service.version",
|
||||
value: { stringValue: AppVersion },
|
||||
},
|
||||
{
|
||||
key: "deployment.environment",
|
||||
value: { stringValue: Env },
|
||||
},
|
||||
],
|
||||
},
|
||||
scopeProfiles: [
|
||||
{
|
||||
scope: {
|
||||
name: "oneuptime-node-profiler",
|
||||
version: "1.0.0",
|
||||
},
|
||||
profiles: [
|
||||
{
|
||||
profileId: profileId,
|
||||
startTimeUnixNano: startTimeNano.toString(),
|
||||
endTimeUnixNano: endTimeNano.toString(),
|
||||
attributes: [
|
||||
{
|
||||
key: "profiler.name",
|
||||
value: { stringValue: "v8-cpu-profiler" },
|
||||
},
|
||||
{
|
||||
key: "runtime.name",
|
||||
value: { stringValue: "nodejs" },
|
||||
},
|
||||
{
|
||||
key: "runtime.version",
|
||||
value: { stringValue: process.version },
|
||||
},
|
||||
],
|
||||
profile: {
|
||||
stringTable,
|
||||
sampleType: [
|
||||
{ type: cpuTypeIdx, unit: nanosecondsIdx },
|
||||
{ type: samplesTypeIdx, unit: countIdx },
|
||||
],
|
||||
sample: samples,
|
||||
locationTable,
|
||||
functionTable,
|
||||
stackTable,
|
||||
linkTable: [],
|
||||
attributeTable: [],
|
||||
periodType: { type: cpuTypeIdx, unit: nanosecondsIdx },
|
||||
period: avgPeriodNs.toString(),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
private static async sendProfile(payload: object): Promise<void> {
|
||||
const endpoint: string | null = this.getOtlpProfilesEndpoint();
|
||||
if (!endpoint) {
|
||||
return;
|
||||
}
|
||||
|
||||
const resourceProfiles: Array<unknown> = (
|
||||
payload as { resourceProfiles: Array<unknown> }
|
||||
).resourceProfiles;
|
||||
if (!resourceProfiles || resourceProfiles.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const headers: Dictionary<string> = this.getHeaders();
|
||||
const jsonData: string = JSON.stringify(payload);
|
||||
|
||||
const compressed: Buffer = await new Promise<Buffer>(
|
||||
(resolve: (value: Buffer) => void, reject: (reason: Error) => void) => {
|
||||
zlib.gzip(jsonData, (err: Error | null, result: Buffer) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
} else {
|
||||
resolve(result);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
const url: NodeURL = new NodeURL(endpoint);
|
||||
const isHttps: boolean = url.protocol === "https:";
|
||||
const httpModule: typeof http | typeof https = isHttps ? https : http;
|
||||
|
||||
return new Promise<void>((resolve: () => void) => {
|
||||
const req: http.ClientRequest = httpModule.request(
|
||||
{
|
||||
hostname: url.hostname,
|
||||
port: url.port || (isHttps ? 443 : 80),
|
||||
path: url.pathname,
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Encoding": "gzip",
|
||||
...headers,
|
||||
},
|
||||
},
|
||||
(res: http.IncomingMessage) => {
|
||||
let data: string = "";
|
||||
res.on("data", (chunk: Buffer) => {
|
||||
data += chunk.toString();
|
||||
});
|
||||
res.on("end", () => {
|
||||
if (
|
||||
res.statusCode &&
|
||||
res.statusCode >= 200 &&
|
||||
res.statusCode < 300
|
||||
) {
|
||||
logger.debug(
|
||||
`Profile sent successfully for service: ${this.serviceName}`,
|
||||
);
|
||||
} else {
|
||||
logger.warn(
|
||||
`Profile export failed with status ${res.statusCode}: ${data}`,
|
||||
);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
req.on("error", (err: Error) => {
|
||||
logger.warn(`Profile export error: ${err.message}`);
|
||||
resolve(); // Don't throw - profiling failures should not crash the service
|
||||
});
|
||||
|
||||
req.write(compressed);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
}
|
||||
15
Common/Types/Exception/ExceptionMetricType.ts
Normal file
15
Common/Types/Exception/ExceptionMetricType.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
enum ExceptionMetricType {
|
||||
ExceptionCount = "oneuptime.exception.count",
|
||||
ExceptionRate = "oneuptime.exception.rate",
|
||||
ExceptionCountByType = "oneuptime.exception.count.by.type",
|
||||
ExceptionCountByService = "oneuptime.exception.count.by.service",
|
||||
UnresolvedExceptionCount = "oneuptime.exception.unresolved.count",
|
||||
ResolvedExceptionCount = "oneuptime.exception.resolved.count",
|
||||
MutedExceptionCount = "oneuptime.exception.muted.count",
|
||||
ExceptionFirstSeenTime = "oneuptime.exception.first.seen.time",
|
||||
ExceptionLastSeenTime = "oneuptime.exception.last.seen.time",
|
||||
ExceptionOccurrenceCount = "oneuptime.exception.occurrence.count",
|
||||
ExceptionAffectedServiceCount = "oneuptime.exception.affected.service.count",
|
||||
}
|
||||
|
||||
export default ExceptionMetricType;
|
||||
28
Common/Types/Metrics/MetricDashboardMetricType.ts
Normal file
28
Common/Types/Metrics/MetricDashboardMetricType.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
enum MetricDashboardMetricType {
|
||||
// HTTP metrics
|
||||
HttpRequestDuration = "http.server.request.duration",
|
||||
HttpRequestCount = "http.server.request.count",
|
||||
HttpRequestErrorRate = "http.server.request.error.rate",
|
||||
HttpResponseSize = "http.server.response.body.size",
|
||||
HttpRequestSize = "http.server.request.body.size",
|
||||
HttpActiveRequests = "http.server.active_requests",
|
||||
|
||||
// System metrics
|
||||
SystemCpuUtilization = "system.cpu.utilization",
|
||||
SystemMemoryUsage = "system.memory.usage",
|
||||
SystemDiskIo = "system.disk.io",
|
||||
SystemNetworkIo = "system.network.io",
|
||||
|
||||
// Runtime metrics
|
||||
ProcessCpuUtilization = "process.cpu.utilization",
|
||||
ProcessMemoryUsage = "process.runtime.jvm.memory.usage",
|
||||
GcDuration = "process.runtime.jvm.gc.duration",
|
||||
ThreadCount = "process.runtime.jvm.threads.count",
|
||||
|
||||
// Custom application metrics
|
||||
CustomCounter = "custom.counter",
|
||||
CustomGauge = "custom.gauge",
|
||||
CustomHistogram = "custom.histogram",
|
||||
}
|
||||
|
||||
export default MetricDashboardMetricType;
|
||||
16
Common/Types/Profile/ProfileMetricType.ts
Normal file
16
Common/Types/Profile/ProfileMetricType.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
enum ProfileMetricType {
|
||||
CpuProfileDuration = "oneuptime.profile.cpu.duration",
|
||||
CpuProfileSampleCount = "oneuptime.profile.cpu.sample.count",
|
||||
WallClockDuration = "oneuptime.profile.wall.duration",
|
||||
MemoryAllocationSize = "oneuptime.profile.memory.allocation.size",
|
||||
MemoryAllocationCount = "oneuptime.profile.memory.allocation.count",
|
||||
HeapUsage = "oneuptime.profile.heap.usage",
|
||||
GoroutineCount = "oneuptime.profile.goroutine.count",
|
||||
ThreadCount = "oneuptime.profile.thread.count",
|
||||
ProfileSampleRate = "oneuptime.profile.sample.rate",
|
||||
ProfileCount = "oneuptime.profile.count",
|
||||
TopFunctionCpuTime = "oneuptime.profile.top.function.cpu.time",
|
||||
TopFunctionAllocations = "oneuptime.profile.top.function.allocations",
|
||||
}
|
||||
|
||||
export default ProfileMetricType;
|
||||
17
Common/Types/Span/SpanMetricType.ts
Normal file
17
Common/Types/Span/SpanMetricType.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
enum SpanMetricType {
|
||||
SpanCount = "oneuptime.span.count",
|
||||
SpanDuration = "oneuptime.span.duration",
|
||||
SpanErrorCount = "oneuptime.span.error.count",
|
||||
SpanErrorRate = "oneuptime.span.error.rate",
|
||||
SpanRequestRate = "oneuptime.span.request.rate",
|
||||
SpanP50Duration = "oneuptime.span.duration.p50",
|
||||
SpanP90Duration = "oneuptime.span.duration.p90",
|
||||
SpanP95Duration = "oneuptime.span.duration.p95",
|
||||
SpanP99Duration = "oneuptime.span.duration.p99",
|
||||
SpanStatusOk = "oneuptime.span.status.ok",
|
||||
SpanStatusError = "oneuptime.span.status.error",
|
||||
SpanStatusUnset = "oneuptime.span.status.unset",
|
||||
SpanThroughput = "oneuptime.span.throughput",
|
||||
}
|
||||
|
||||
export default SpanMetricType;
|
||||
100
Common/Utils/Alerts/AlertMetricType.ts
Normal file
100
Common/Utils/Alerts/AlertMetricType.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import AggregationType from "../../Types/BaseDatabase/AggregationType";
|
||||
import AlertMetricType from "../../Types/Alerts/AlertMetricType";
|
||||
|
||||
class AlertMetricTypeUtil {
|
||||
public static getAggregationTypeByAlertMetricType(
|
||||
metricType: AlertMetricType,
|
||||
): AggregationType {
|
||||
switch (metricType) {
|
||||
case AlertMetricType.AlertCount:
|
||||
return AggregationType.Sum;
|
||||
case AlertMetricType.TimeToAcknowledge:
|
||||
return AggregationType.Avg;
|
||||
case AlertMetricType.TimeToResolve:
|
||||
return AggregationType.Avg;
|
||||
case AlertMetricType.AlertDuration:
|
||||
return AggregationType.Avg;
|
||||
default:
|
||||
throw new Error("Invalid AlertMetricType value");
|
||||
}
|
||||
}
|
||||
|
||||
public static getAllAlertMetricTypes(): Array<AlertMetricType> {
|
||||
return [
|
||||
AlertMetricType.AlertCount,
|
||||
AlertMetricType.TimeToAcknowledge,
|
||||
AlertMetricType.TimeToResolve,
|
||||
AlertMetricType.AlertDuration,
|
||||
];
|
||||
}
|
||||
|
||||
public static getTitleByAlertMetricType(
|
||||
metricType: AlertMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case AlertMetricType.AlertCount:
|
||||
return "Alert Count";
|
||||
case AlertMetricType.TimeToAcknowledge:
|
||||
return "Time to Acknowledge";
|
||||
case AlertMetricType.TimeToResolve:
|
||||
return "Time to Resolve";
|
||||
case AlertMetricType.AlertDuration:
|
||||
return "Alert Duration";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getDescriptionByAlertMetricType(
|
||||
metricType: AlertMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case AlertMetricType.AlertCount:
|
||||
return "The number of alerts created for this monitor over time.";
|
||||
case AlertMetricType.TimeToAcknowledge:
|
||||
return "The average time taken to acknowledge alerts for this monitor.";
|
||||
case AlertMetricType.TimeToResolve:
|
||||
return "The average time taken to resolve alerts for this monitor.";
|
||||
case AlertMetricType.AlertDuration:
|
||||
return "The average duration of alerts for this monitor.";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getLegendByAlertMetricType(
|
||||
metricType: AlertMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case AlertMetricType.AlertCount:
|
||||
return "Alerts";
|
||||
case AlertMetricType.TimeToAcknowledge:
|
||||
return "Time to Acknowledge";
|
||||
case AlertMetricType.TimeToResolve:
|
||||
return "Time to Resolve";
|
||||
case AlertMetricType.AlertDuration:
|
||||
return "Duration";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getLegendUnitByAlertMetricType(
|
||||
metricType: AlertMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case AlertMetricType.AlertCount:
|
||||
return "";
|
||||
case AlertMetricType.TimeToAcknowledge:
|
||||
return "s";
|
||||
case AlertMetricType.TimeToResolve:
|
||||
return "s";
|
||||
case AlertMetricType.AlertDuration:
|
||||
return "s";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default AlertMetricTypeUtil;
|
||||
130
Common/Utils/Incident/IncidentMetricType.ts
Normal file
130
Common/Utils/Incident/IncidentMetricType.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import AggregationType from "../../Types/BaseDatabase/AggregationType";
|
||||
import IncidentMetricType from "../../Types/Incident/IncidentMetricType";
|
||||
|
||||
class IncidentMetricTypeUtil {
|
||||
public static getAggregationTypeByIncidentMetricType(
|
||||
metricType: IncidentMetricType,
|
||||
): AggregationType {
|
||||
switch (metricType) {
|
||||
case IncidentMetricType.IncidentCount:
|
||||
return AggregationType.Sum;
|
||||
case IncidentMetricType.TimeToAcknowledge:
|
||||
return AggregationType.Avg;
|
||||
case IncidentMetricType.TimeToResolve:
|
||||
return AggregationType.Avg;
|
||||
case IncidentMetricType.IncidentDuration:
|
||||
return AggregationType.Avg;
|
||||
case IncidentMetricType.TimeInState:
|
||||
return AggregationType.Avg;
|
||||
case IncidentMetricType.SeverityChange:
|
||||
return AggregationType.Sum;
|
||||
case IncidentMetricType.PostmortemCompletionTime:
|
||||
return AggregationType.Avg;
|
||||
default:
|
||||
throw new Error("Invalid IncidentMetricType value");
|
||||
}
|
||||
}
|
||||
|
||||
public static getAllIncidentMetricTypes(): Array<IncidentMetricType> {
|
||||
return [
|
||||
IncidentMetricType.IncidentCount,
|
||||
IncidentMetricType.TimeToAcknowledge,
|
||||
IncidentMetricType.TimeToResolve,
|
||||
IncidentMetricType.IncidentDuration,
|
||||
];
|
||||
}
|
||||
|
||||
public static getTitleByIncidentMetricType(
|
||||
metricType: IncidentMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case IncidentMetricType.IncidentCount:
|
||||
return "Incident Count";
|
||||
case IncidentMetricType.TimeToAcknowledge:
|
||||
return "Time to Acknowledge";
|
||||
case IncidentMetricType.TimeToResolve:
|
||||
return "Time to Resolve";
|
||||
case IncidentMetricType.IncidentDuration:
|
||||
return "Incident Duration";
|
||||
case IncidentMetricType.TimeInState:
|
||||
return "Time in State";
|
||||
case IncidentMetricType.SeverityChange:
|
||||
return "Severity Changes";
|
||||
case IncidentMetricType.PostmortemCompletionTime:
|
||||
return "Postmortem Completion Time";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getDescriptionByIncidentMetricType(
|
||||
metricType: IncidentMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case IncidentMetricType.IncidentCount:
|
||||
return "The number of incidents created for this monitor over time.";
|
||||
case IncidentMetricType.TimeToAcknowledge:
|
||||
return "The average time taken to acknowledge incidents for this monitor.";
|
||||
case IncidentMetricType.TimeToResolve:
|
||||
return "The average time taken to resolve incidents for this monitor.";
|
||||
case IncidentMetricType.IncidentDuration:
|
||||
return "The average duration of incidents for this monitor.";
|
||||
case IncidentMetricType.TimeInState:
|
||||
return "The average time incidents spend in each state for this monitor.";
|
||||
case IncidentMetricType.SeverityChange:
|
||||
return "The number of severity changes for incidents related to this monitor.";
|
||||
case IncidentMetricType.PostmortemCompletionTime:
|
||||
return "The average time taken to complete postmortems for incidents related to this monitor.";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getLegendByIncidentMetricType(
|
||||
metricType: IncidentMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case IncidentMetricType.IncidentCount:
|
||||
return "Incidents";
|
||||
case IncidentMetricType.TimeToAcknowledge:
|
||||
return "Time to Acknowledge";
|
||||
case IncidentMetricType.TimeToResolve:
|
||||
return "Time to Resolve";
|
||||
case IncidentMetricType.IncidentDuration:
|
||||
return "Duration";
|
||||
case IncidentMetricType.TimeInState:
|
||||
return "Time in State";
|
||||
case IncidentMetricType.SeverityChange:
|
||||
return "Severity Changes";
|
||||
case IncidentMetricType.PostmortemCompletionTime:
|
||||
return "Postmortem Time";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public static getLegendUnitByIncidentMetricType(
|
||||
metricType: IncidentMetricType,
|
||||
): string {
|
||||
switch (metricType) {
|
||||
case IncidentMetricType.IncidentCount:
|
||||
return "";
|
||||
case IncidentMetricType.TimeToAcknowledge:
|
||||
return "s";
|
||||
case IncidentMetricType.TimeToResolve:
|
||||
return "s";
|
||||
case IncidentMetricType.IncidentDuration:
|
||||
return "s";
|
||||
case IncidentMetricType.TimeInState:
|
||||
return "s";
|
||||
case IncidentMetricType.SeverityChange:
|
||||
return "";
|
||||
case IncidentMetricType.PostmortemCompletionTime:
|
||||
return "s";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default IncidentMetricTypeUtil;
|
||||
@@ -4,6 +4,7 @@ import InfrastructureStatus from "Common/Server/Infrastructure/Status";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import "ejs";
|
||||
|
||||
const APP_NAME: string = "home";
|
||||
@@ -15,6 +16,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
const statusCheck: PromiseVoidFunction = async (): Promise<void> => {
|
||||
// Check the status of infrastructure components
|
||||
return await InfrastructureStatus.checkStatusWithRetry({
|
||||
|
||||
@@ -9,6 +9,7 @@ import Express, { ExpressApplication } from "Common/Server/Utils/Express";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import { PromiseVoidFunction } from "Common/Types/FunctionTypes";
|
||||
import "ejs";
|
||||
|
||||
@@ -75,6 +76,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize the app with service name and status checks
|
||||
await App.init({
|
||||
appName: APP_NAME,
|
||||
|
||||
@@ -16,6 +16,7 @@ import { PromiseVoidFunction } from "Common/Types/FunctionTypes";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import Express, { ExpressApplication } from "Common/Server/Utils/Express";
|
||||
import "ejs";
|
||||
|
||||
@@ -47,6 +48,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
logger.info(
|
||||
`Probe Service - Monitoring workers: ${PROBE_MONITORING_WORKERS}, Monitor fetch limit: ${PROBE_MONITOR_FETCH_LIMIT}, Script timeout: ${PROBE_SYNTHETIC_MONITOR_SCRIPT_TIMEOUT_IN_MS}ms / ${PROBE_CUSTOM_CODE_MONITOR_SCRIPT_TIMEOUT_IN_MS}ms, Retry limit: ${PROBE_MONITOR_RETRY_LIMIT}`,
|
||||
);
|
||||
|
||||
@@ -21,6 +21,7 @@ import logger from "Common/Server/Utils/Logger";
|
||||
import Realtime from "Common/Server/Utils/Realtime";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import "./Jobs/TelemetryIngest/ProcessTelemetry";
|
||||
import { TELEMETRY_CONCURRENCY } from "./Config";
|
||||
import type { StatusAPIOptions } from "Common/Server/API/StatusAPI";
|
||||
@@ -79,6 +80,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
logger.info(
|
||||
`Telemetry Service - Queue concurrency: ${TELEMETRY_CONCURRENCY}`,
|
||||
);
|
||||
|
||||
@@ -5,6 +5,7 @@ import Express, { ExpressApplication } from "Common/Server/Utils/Express";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import "ejs";
|
||||
|
||||
const app: ExpressApplication = Express.getExpressApp();
|
||||
@@ -21,6 +22,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// init the app
|
||||
await App.init({
|
||||
appName: APP_NAME,
|
||||
|
||||
@@ -5,6 +5,7 @@ import InfrastructureStatus from "Common/Server/Infrastructure/Status";
|
||||
import logger from "Common/Server/Utils/Logger";
|
||||
import App from "Common/Server/Utils/StartServer";
|
||||
import Telemetry from "Common/Server/Utils/Telemetry";
|
||||
import Profiling from "Common/Server/Utils/Profiling";
|
||||
import Realtime from "Common/Server/Utils/Realtime";
|
||||
import PostgresAppInstance from "Common/Server/Infrastructure/PostgresDatabase";
|
||||
import Redis from "Common/Server/Infrastructure/Redis";
|
||||
@@ -23,6 +24,11 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
// Initialize profiling (opt-in via ENABLE_PROFILING env var)
|
||||
Profiling.init({
|
||||
serviceName: APP_NAME,
|
||||
});
|
||||
|
||||
logger.debug("Telemetry initialized");
|
||||
|
||||
logger.info(`Worker Service - Queue concurrency: ${WORKER_CONCURRENCY}`);
|
||||
|
||||
@@ -305,6 +305,14 @@ DISABLE_TELEMETRY_FOR_WORKER=true
|
||||
|
||||
DISABLE_TELEMETRY_FOR_AI_AGENT=true
|
||||
|
||||
# By default profiling is disabled for all services. Set to true to enable CPU profiling for a service.
|
||||
ENABLE_PROFILING_FOR_APP=false
|
||||
ENABLE_PROFILING_FOR_TELEMETRY=false
|
||||
ENABLE_PROFILING_FOR_TEST_SERVER=false
|
||||
ENABLE_PROFILING_FOR_PROBE=false
|
||||
ENABLE_PROFILING_FOR_WORKER=false
|
||||
ENABLE_PROFILING_FOR_AI_AGENT=false
|
||||
|
||||
|
||||
# Connect OneUptime with Slack App
|
||||
SLACK_APP_CLIENT_ID=
|
||||
|
||||
@@ -223,6 +223,7 @@ services:
|
||||
<<: *common-runtime-variables
|
||||
PORT: ${TEST_SERVER_PORT}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_TEST_SERVER}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_TEST_SERVER}
|
||||
logging:
|
||||
driver: "local"
|
||||
options:
|
||||
@@ -242,6 +243,7 @@ services:
|
||||
SMS_HIGH_RISK_COST_IN_CENTS: ${SMS_HIGH_RISK_COST_IN_CENTS}
|
||||
CALL_HIGH_RISK_COST_IN_CENTS_PER_MINUTE: ${CALL_HIGH_RISK_COST_IN_CENTS_PER_MINUTE}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_APP}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_APP}
|
||||
logging:
|
||||
driver: "local"
|
||||
options:
|
||||
@@ -267,6 +269,7 @@ services:
|
||||
<<: *common-runtime-variables
|
||||
PORT: ${WORKER_PORT}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_WORKER}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_WORKER}
|
||||
WORKER_CONCURRENCY: ${WORKER_CONCURRENCY}
|
||||
logging:
|
||||
driver: "local"
|
||||
@@ -290,6 +293,7 @@ services:
|
||||
NODE_ENV: ${ENVIRONMENT}
|
||||
LOG_LEVEL: ${LOG_LEVEL}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_PROBE}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_PROBE}
|
||||
OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT: ${OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT}
|
||||
OPENTELEMETRY_EXPORTER_OTLP_HEADERS: ${OPENTELEMETRY_EXPORTER_OTLP_HEADERS}
|
||||
logging:
|
||||
@@ -314,6 +318,7 @@ services:
|
||||
NODE_ENV: ${ENVIRONMENT}
|
||||
LOG_LEVEL: ${LOG_LEVEL}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_PROBE}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_PROBE}
|
||||
OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT: ${OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT}
|
||||
OPENTELEMETRY_EXPORTER_OTLP_HEADERS: ${OPENTELEMETRY_EXPORTER_OTLP_HEADERS}
|
||||
logging:
|
||||
@@ -329,6 +334,7 @@ services:
|
||||
AI_AGENT_KEY: ${AI_AGENT_KEY}
|
||||
ONEUPTIME_URL: ${AI_AGENT_ONEUPTIME_URL}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_AI_AGENT}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_AI_AGENT}
|
||||
PORT: ${AI_AGENT_PORT}
|
||||
logging:
|
||||
driver: "local"
|
||||
@@ -361,6 +367,7 @@ services:
|
||||
<<: *common-runtime-variables
|
||||
PORT: ${TELEMETRY_PORT}
|
||||
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_TELEMETRY}
|
||||
ENABLE_PROFILING: ${ENABLE_PROFILING_FOR_TELEMETRY}
|
||||
# Max concurrent telemetry jobs the worker will process
|
||||
TELEMETRY_CONCURRENCY: ${TELEMETRY_CONCURRENCY}
|
||||
REGISTER_PROBE_KEY: ${REGISTER_PROBE_KEY}
|
||||
|
||||
Reference in New Issue
Block a user