fix: Skip probe offline email notifications when billing is enabled

fix: Update nextPingAt calculation to use a 2-minute offset for improved timing accuracy
fix: Improve queue size extraction and handling in metrics endpoint
2026-04-06 08:42:13 +02:00 · 2025-08-02 12:36:50 +01:00 · 2025-08-02 11:42:01 +01:00 · 2025-08-01 20:58:58 +01:00 · 2025-08-01 20:36:30 +01:00 · 2025-08-01 20:21:41 +01:00
37 changed files with 438 additions and 220 deletions
--- a/FluentIngest/API/FluentIngest.ts
+++ b/FluentIngest/API/FluentIngest.ts
@@ -52,7 +52,9 @@ router.post(
      Response.sendEmptySuccessResponse(req, res);

      // Add to queue for asynchronous processing
-      await FluentIngestQueueService.addFluentIngestJob(req as TelemetryRequest);
+      await FluentIngestQueueService.addFluentIngestJob(
+        req as TelemetryRequest,
+      );

      return;
    } catch (err) {
--- a/FluentIngest/Jobs/FluentIngest/ProcessFluentIngest.ts
+++ b/FluentIngest/Jobs/FluentIngest/ProcessFluentIngest.ts
@@ -34,9 +34,7 @@ QueueWorker.getWorker(
        requestHeaders: jobData.requestHeaders,
      });

-      logger.debug(
-        `Successfully processed fluent ingestion job: ${job.name}`,
-      );
+      logger.debug(`Successfully processed fluent ingestion job: ${job.name}`);
    } catch (error) {
      logger.error(`Error processing fluent ingestion job:`);
      logger.error(error);
@@ -55,8 +53,9 @@ async function processFluentIngestFromQueue(
    | Array<JSONObject | string>
    | JSONObject;

-  let oneuptimeServiceName: string | string[] | undefined =
-    data.requestHeaders["x-oneuptime-service-name"] as string | string[] | undefined;
+  let oneuptimeServiceName: string | string[] | undefined = data.requestHeaders[
+    "x-oneuptime-service-name"
+  ] as string | string[] | undefined;

  if (!oneuptimeServiceName) {
    oneuptimeServiceName = "Unknown Service";
@@ -124,7 +123,9 @@ async function processFluentIngestFromQueue(
  OTelIngestService.recordDataIngestedUsgaeBilling({
    services: {
      [oneuptimeServiceName as string]: {
-        dataIngestedInGB: JSONFunctions.getSizeOfJSONinGB(data.requestBody as JSONObject),
+        dataIngestedInGB: JSONFunctions.getSizeOfJSONinGB(
+          data.requestBody as JSONObject,
+        ),
        dataRententionInDays: telemetryService.dataRententionInDays,
        serviceId: telemetryService.serviceId,
        serviceName: oneuptimeServiceName as string,
--- a/FluentIngest/Services/Queue/FluentIngestQueueService.ts
+++ b/FluentIngest/Services/Queue/FluentIngestQueueService.ts
@@ -12,9 +12,7 @@ export interface FluentIngestJobData {
 }

 export default class FluentIngestQueueService {
-  public static async addFluentIngestJob(
-    req: TelemetryRequest,
-  ): Promise<void> {
+  public static async addFluentIngestJob(req: TelemetryRequest): Promise<void> {
    try {
      const jobData: FluentIngestJobData = {
        projectId: req.projectId.toString(),
--- a/HelmChart/Public/oneuptime/templates/_helpers.tpl
+++ b/HelmChart/Public/oneuptime/templates/_helpers.tpl
@@ -98,39 +98,39 @@ Usage:
  value: {{ $.Release.Name }}-docs.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}

 - name: APP_PORT
-  value: {{ $.Values.port.app | squote }}
+  value: {{ $.Values.app.ports.http | squote }}
 - name: PROBE_INGEST_PORT
-  value: {{ $.Values.port.probeIngest | squote }}
+  value: {{ $.Values.probeIngest.ports.http | squote }}
 - name: SERVER_MONITOR_INGEST_PORT
-  value: {{ $.Values.port.serverMonitorIngest | squote }}
+  value: {{ $.Values.serverMonitorIngest.ports.http | squote }}
 - name: OPEN_TELEMETRY_INGEST_PORT
-  value: {{ $.Values.port.openTelemetryIngest | squote }}
+  value: {{ $.Values.openTelemetryIngest.ports.http | squote }}
 - name: INCOMING_REQUEST_INGEST_PORT
-  value: {{ $.Values.port.incomingRequestIngest | squote }}
+  value: {{ $.Values.incomingRequestIngest.ports.http | squote }}
 - name: FLUENT_INGEST_PORT
-  value: {{ $.Values.port.fluentIngest | squote }}
+  value: {{ $.Values.fluentIngest.ports.http | squote }}
 - name: TEST_SERVER_PORT
-  value: {{ $.Values.port.testServer | squote }}
+  value: {{ $.Values.testServer.ports.http | squote }}
 - name: ACCOUNTS_PORT
-  value: {{ $.Values.port.accounts | squote }}
+  value: {{ $.Values.accounts.ports.http | squote }}
 - name: ISOLATED_VM_PORT
-  value: {{ $.Values.port.isolatedVM | squote }}
+  value: {{ $.Values.isolatedVM.ports.http | squote }}
 - name: HOME_PORT
-  value: {{ $.Values.port.home | squote }}
+  value: {{ $.Values.home.ports.http | squote }}
 - name: WORKER_PORT
-  value: {{ $.Values.port.worker | squote }}
+  value: {{ $.Values.worker.ports.http | squote }}
 - name: WORKFLOW_PORT
-  value: {{ $.Values.port.workflow | squote }}
+  value: {{ $.Values.workflow.ports.http | squote }}
 - name: STATUS_PAGE_PORT
-  value: {{ $.Values.port.statusPage | squote }}
+  value: {{ $.Values.statusPage.ports.http | squote }}
 - name: DASHBOARD_PORT
-  value: {{ $.Values.port.dashboard | squote }}
+  value: {{ $.Values.dashboard.ports.http | squote }}
 - name: ADMIN_DASHBOARD_PORT
-  value: {{ $.Values.port.adminDashboard | squote }}
+  value: {{ $.Values.adminDashboard.ports.http | squote }}
 - name: API_REFERENCE_PORT
-  value: {{ $.Values.port.apiReference | squote }}
+  value: {{ $.Values.apiReference.ports.http | squote }}
 - name: DOCS_PORT
-  value: {{ $.Values.port.docs | squote }}
+  value: {{ $.Values.docs.ports.http | squote }}
 {{- end }}


--- a/HelmChart/Public/oneuptime/templates/accounts.yaml
+++ b/HelmChart/Public/oneuptime/templates/accounts.yaml
@@ -1,12 +1,12 @@
 # OneUptime accounts Deployment
-{{- $accountsEnv := dict "PORT" $.Values.port.accounts "DISABLE_TELEMETRY" $.Values.accounts.disableTelemetryCollection -}}
-{{- $accountsPorts := dict "port" $.Values.port.accounts -}}
+{{- $accountsEnv := dict "PORT" $.Values.accounts.ports.http "DISABLE_TELEMETRY" $.Values.accounts.disableTelemetryCollection -}}
+{{- $accountsPorts := $.Values.accounts.ports -}}
 {{- $accountsDeploymentArgs :=dict "IsUI" true "ServiceName" "accounts" "Ports" $accountsPorts "Release" $.Release "Values" $.Values "Env" $accountsEnv "Resources" $.Values.accounts.resources "DisableAutoscaler" $.Values.accounts.disableAutoscaler "ReplicaCount" $.Values.accounts.replicaCount -}}
 {{- include "oneuptime.deployment" $accountsDeploymentArgs }}
 ---

 # OneUptime accounts Service
-{{- $accountsPorts := dict "port" $.Values.port.accounts -}}
+{{- $accountsPorts := $.Values.accounts.ports -}}
 {{- $accountsServiceArgs := dict "ServiceName" "accounts" "Ports" $accountsPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $accountsServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/admin-dashboard.yaml
+++ b/HelmChart/Public/oneuptime/templates/admin-dashboard.yaml
@@ -1,12 +1,12 @@
-# OneUptime adminDashboard Deployment
-{{- $adminDashboardEnv := dict "PORT" $.Values.port.adminDashboard "DISABLE_TELEMETRY" $.Values.adminDashboard.disableTelemetryCollection -}}
-{{- $adminDashboardPorts := dict "port" $.Values.port.adminDashboard -}}
-{{- $adminDashboardDeploymentArgs :=dict "IsUI" true  "ServiceName" "admin-dashboard" "Ports" $adminDashboardPorts "Release" $.Release "Values" $.Values "Env" $adminDashboardEnv "Resources" $.Values.adminDashboard.resources "DisableAutoscaler" $.Values.adminDashboard.disableAutoscaler "ReplicaCount" $.Values.adminDashboard.replicaCount -}}
+# OneUptime admin-dashboard Deployment
+{{- $adminDashboardEnv := dict "PORT" $.Values.adminDashboard.ports.http "DISABLE_TELEMETRY" $.Values.adminDashboard.disableTelemetryCollection -}}
+{{- $adminDashboardPorts := $.Values.adminDashboard.ports -}}
+{{- $adminDashboardDeploymentArgs :=dict "IsUI" true "ServiceName" "admin-dashboard" "Ports" $adminDashboardPorts "Release" $.Release "Values" $.Values "Env" $adminDashboardEnv "Resources" $.Values.adminDashboard.resources "DisableAutoscaler" $.Values.adminDashboard.disableAutoscaler "ReplicaCount" $.Values.adminDashboard.replicaCount -}}
 {{- include "oneuptime.deployment" $adminDashboardDeploymentArgs }}
 ---

-# OneUptime adminDashboard Service
-{{- $adminDashboardPorts := dict "port" $.Values.port.adminDashboard -}}
+# OneUptime admin-dashboard Service
+{{- $adminDashboardPorts := $.Values.adminDashboard.ports -}}
 {{- $adminDashboardServiceArgs := dict "ServiceName" "admin-dashboard" "Ports" $adminDashboardPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $adminDashboardServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/api-reference.yaml
+++ b/HelmChart/Public/oneuptime/templates/api-reference.yaml
@@ -52,7 +52,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.apiReference }}
+              port: {{ $.Values.apiReference.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -61,7 +61,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.apiReference }}
+              port: {{ $.Values.apiReference.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -71,7 +71,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.apiReference }}
+              port: {{ $.Values.apiReference.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -85,11 +85,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.apiReference | quote }}
+              value: {{ $.Values.apiReference.ports.http | quote }}
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.apiReference.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.apiReference }}
+            - containerPort: {{ $.Values.apiReference.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.apiReference.resources }}
@@ -101,7 +101,8 @@ spec:
 ---

 # OneUptime app Service
-{{- $apiReferencePorts := dict "port" $.Values.port.apiReference -}}
+# OneUptime apiReference Service
+{{- $apiReferencePorts := dict "port" $.Values.apiReference.ports.http -}}
 {{- $apiReferenceServiceArgs := dict "ServiceName" "api-reference" "Ports" $apiReferencePorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $apiReferenceServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/app.yaml
+++ b/HelmChart/Public/oneuptime/templates/app.yaml
@@ -53,7 +53,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.app }}
+              port: {{ $.Values.app.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -62,7 +62,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.app }}
+              port: {{ $.Values.app.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -72,7 +72,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.app }}
+              port: {{ $.Values.app.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -86,7 +86,7 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.app | quote }}
+              value: {{ $.Values.app.ports.http | quote }}
            - name: SMS_HIGH_RISK_COST_IN_CENTS
              value: {{ $.Values.billing.smsHighRiskValueInCents | quote }}
            - name: CALL_HIGH_RISK_COST_IN_CENTS_PER_MINUTE
@@ -99,7 +99,7 @@ spec:
              value: {{ $.Values.app.disableTelemetryCollection | quote }}
        
          ports:
-            - containerPort: {{ $.Values.port.app }}
+            - containerPort: {{ $.Values.app.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.app.resources }}
@@ -111,7 +111,7 @@ spec:
 ---

 # OneUptime app Service
-{{- $appPorts := dict "port" $.Values.port.app -}}
+{{- $appPorts := dict "port" $.Values.app.ports.http -}}
 {{- $appServiceArgs := dict "ServiceName" "app" "Ports" $appPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $appServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/dashboard.yaml
+++ b/HelmChart/Public/oneuptime/templates/dashboard.yaml
@@ -1,12 +1,12 @@
 # OneUptime dashboard Deployment
-{{- $dashboardPorts := dict "port" $.Values.port.dashboard -}}
-{{- $dashboardEnv := dict "PORT"  $.Values.port.dashboard "DISABLE_TELEMETRY" $.Values.dashboard.disableTelemetryCollection -}}
+{{- $dashboardPorts := $.Values.dashboard.ports -}}
+{{- $dashboardEnv := dict "PORT"  $.Values.dashboard.ports.http "DISABLE_TELEMETRY" $.Values.dashboard.disableTelemetryCollection -}}
 {{- $dashboardDeploymentArgs :=dict "IsUI" true "ServiceName" "dashboard" "Ports" $dashboardPorts "Release" $.Release "Values" $.Values "Env" $dashboardEnv "Resources" $.Values.dashboard.resources "DisableAutoscaler" $.Values.dashboard.disableAutoscaler "ReplicaCount" $.Values.dashboard.replicaCount -}}
 {{- include "oneuptime.deployment" $dashboardDeploymentArgs }}
 ---

 # OneUptime dashboard Service
-{{- $dashboardPorts := dict "port" $.Values.port.dashboard -}}
+{{- $dashboardPorts := $.Values.dashboard.ports -}}
 {{- $dashboardServiceArgs := dict "ServiceName" "dashboard" "Ports" $dashboardPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $dashboardServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/docs.yaml
+++ b/HelmChart/Public/oneuptime/templates/docs.yaml
@@ -52,7 +52,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.docs }}
+              port: {{ $.Values.docs.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -61,7 +61,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.docs }}
+              port: {{ $.Values.docs.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -71,7 +71,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.docs }}
+              port: {{ $.Values.docs.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -85,11 +85,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.docs | quote }}
+              value: {{ $.Values.docs.ports.http | quote }}
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.docs.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.docs }}
+            - containerPort: {{ $.Values.docs.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.docs.resources }}
@@ -101,7 +101,8 @@ spec:
 ---

 # OneUptime app Service
-{{- $docsPorts := dict "port" $.Values.port.docs -}}
+# OneUptime docs Service
+{{- $docsPorts := dict "port" $.Values.docs.ports.http -}}
 {{- $docsServiceArgs := dict "ServiceName" "docs" "Ports" $docsPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $docsServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/fluent-ingest.yaml
+++ b/HelmChart/Public/oneuptime/templates/fluent-ingest.yaml
@@ -57,7 +57,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.fluentIngest }}
+              port: {{ $.Values.fluentIngest.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -66,7 +66,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.fluentIngest }}
+              port: {{ $.Values.fluentIngest.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -76,7 +76,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.fluentIngest }}
+              port: {{ $.Values.fluentIngest.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -90,11 +90,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.fluentIngest | quote }} 
+              value: {{ $.Values.fluentIngest.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.fluentIngest.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.fluentIngest }}
+            - containerPort: {{ $.Values.fluentIngest.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.fluentIngest.resources }}
@@ -106,13 +106,13 @@ spec:
 ---

 # OneUptime fluent-ingest Service
-{{- $fluentIngestPorts := dict "port" $.Values.port.fluentIngest -}}
+{{- $fluentIngestPorts := dict "port" $.Values.fluentIngest.ports.http -}}
 {{- $fluentIngestServiceArgs := dict "ServiceName" "fluent-ingest" "Ports" $fluentIngestPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $fluentIngestServiceArgs }}
 ---

 # OneUptime fluent-ingest autoscaler
-{{- if not $.Values.fluentIngest.disableAutoscaler }}
+{{- if and (not $.Values.fluentIngest.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.fluentIngest.keda.enabled)) }}
 {{- $fluentIngestAutoScalerArgs := dict "ServiceName" "fluent-ingest" "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.autoscaler" $fluentIngestAutoScalerArgs }}
 {{- end }}
--- a/HelmChart/Public/oneuptime/templates/home.yaml
+++ b/HelmChart/Public/oneuptime/templates/home.yaml
@@ -52,7 +52,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.home }}
+              port: {{ $.Values.home.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -61,7 +61,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.home }}
+              port: {{ $.Values.home.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -71,7 +71,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.home }}
+              port: {{ $.Values.home.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -85,11 +85,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.home | quote }}
+              value: {{ $.Values.home.ports.http | quote }}
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.home.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.home }}
+            - containerPort: {{ $.Values.home.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.home.resources }}
@@ -101,7 +101,7 @@ spec:
 ---

 # OneUptime app Service
-{{- $homePorts := dict "port" $.Values.port.home -}}
+{{- $homePorts := $.Values.home.ports -}}
 {{- $homeServiceArgs := dict "ServiceName" "home" "Ports" $homePorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $homeServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/incoming-request-ingest.yaml
+++ b/HelmChart/Public/oneuptime/templates/incoming-request-ingest.yaml
@@ -57,7 +57,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.incomingRequestIngest }}
+              port: {{ $.Values.incomingRequestIngest.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -66,7 +66,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.incomingRequestIngest }}
+              port: {{ $.Values.incomingRequestIngest.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -76,7 +76,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.incomingRequestIngest }}
+              port: {{ $.Values.incomingRequestIngest.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -90,11 +90,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.incomingRequestIngest | quote }} 
+              value: {{ $.Values.incomingRequestIngest.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.incomingRequestIngest.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.incomingRequestIngest }}
+            - containerPort: {{ $.Values.incomingRequestIngest.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.incomingRequestIngest.resources }}
@@ -106,13 +106,13 @@ spec:
 ---

 # OneUptime incoming-request-ingest Service
-{{- $incomingRequestIngestPorts := dict "port" $.Values.port.incomingRequestIngest -}}
+{{- $incomingRequestIngestPorts := dict "port" $.Values.incomingRequestIngest.ports.http -}}
 {{- $incomingRequestIngestServiceArgs := dict "ServiceName" "incoming-request-ingest" "Ports" $incomingRequestIngestPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $incomingRequestIngestServiceArgs }}
 ---

 # OneUptime incoming-request-ingest autoscaler
-{{- if not $.Values.incomingRequestIngest.disableAutoscaler }}
+{{- if and (not $.Values.incomingRequestIngest.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.incomingRequestIngest.keda.enabled)) }}
 {{- $incomingRequestIngestAutoScalerArgs := dict "ServiceName" "incoming-request-ingest" "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.autoscaler" $incomingRequestIngestAutoScalerArgs }}
 {{- end }}
--- a/HelmChart/Public/oneuptime/templates/isolated-vm.yaml
+++ b/HelmChart/Public/oneuptime/templates/isolated-vm.yaml
@@ -56,12 +56,12 @@ spec:
            {{- include "oneuptime.env.common" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.isolatedVM | quote }} 
+              value: {{ $.Values.isolatedVM.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.isolatedVM.disableTelemetryCollection | quote }}
        
          ports:
-            - containerPort: {{ $.Values.port.isolatedVM }}
+            - containerPort: {{ $.Values.isolatedVM.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.isolatedVM.resources }}
@@ -73,7 +73,7 @@ spec:
 ---

 # OneUptime isolatedVM Service
-{{- $isolatedVMPorts := dict "port" $.Values.port.isolatedVM -}}
+{{- $isolatedVMPorts := $.Values.isolatedVM.ports -}}
 {{- $isolatedVMServiceArgs := dict "ServiceName" "isolated-vm" "Ports" $isolatedVMPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $isolatedVMServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
+++ b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
@@ -4,35 +4,49 @@ KEDA ScaledObjects for various services

 {{/* OpenTelemetry Ingest KEDA ScaledObject */}}
 {{- if and .Values.keda.enabled .Values.openTelemetryIngest.keda.enabled (not .Values.openTelemetryIngest.disableAutoscaler) }}
-{{- $metricsConfig := dict "enabled" .Values.openTelemetryIngest.keda.enabled "minReplicas" .Values.openTelemetryIngest.keda.minReplicas "maxReplicas" .Values.openTelemetryIngest.keda.maxReplicas "pollingInterval" .Values.openTelemetryIngest.keda.pollingInterval "cooldownPeriod" .Values.openTelemetryIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_telemetry_queue_size" "threshold" .Values.openTelemetryIngest.keda.queueSizeThreshold "port" .Values.port.openTelemetryIngest)) }}
+{{- $metricsConfig := dict "enabled" .Values.openTelemetryIngest.keda.enabled "minReplicas" .Values.openTelemetryIngest.keda.minReplicas "maxReplicas" .Values.openTelemetryIngest.keda.maxReplicas "pollingInterval" .Values.openTelemetryIngest.keda.pollingInterval "cooldownPeriod" .Values.openTelemetryIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_telemetry_queue_size" "threshold" .Values.openTelemetryIngest.keda.queueSizeThreshold "port" .Values.openTelemetryIngest.ports.http)) }}
 {{- $openTelemetryIngestKedaArgs := dict "ServiceName" "open-telemetry-ingest" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.openTelemetryIngest.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $openTelemetryIngestKedaArgs }}
 {{- end }}

 {{/* Fluent Ingest KEDA ScaledObject */}}
 {{- if and .Values.keda.enabled .Values.fluentIngest.keda.enabled (not .Values.fluentIngest.disableAutoscaler) }}
-{{- $metricsConfig := dict "enabled" .Values.fluentIngest.keda.enabled "minReplicas" .Values.fluentIngest.keda.minReplicas "maxReplicas" .Values.fluentIngest.keda.maxReplicas "pollingInterval" .Values.fluentIngest.keda.pollingInterval "cooldownPeriod" .Values.fluentIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_fluent_ingest_queue_size" "threshold" .Values.fluentIngest.keda.queueSizeThreshold "port" .Values.port.fluentIngest)) }}
+{{- $metricsConfig := dict "enabled" .Values.fluentIngest.keda.enabled "minReplicas" .Values.fluentIngest.keda.minReplicas "maxReplicas" .Values.fluentIngest.keda.maxReplicas "pollingInterval" .Values.fluentIngest.keda.pollingInterval "cooldownPeriod" .Values.fluentIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_fluent_ingest_queue_size" "threshold" .Values.fluentIngest.keda.queueSizeThreshold "port" .Values.fluentIngest.ports.http)) }}
 {{- $fluentIngestKedaArgs := dict "ServiceName" "fluent-ingest" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.fluentIngest.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $fluentIngestKedaArgs }}
 {{- end }}

 {{/* Incoming Request Ingest KEDA ScaledObject */}}
 {{- if and .Values.keda.enabled .Values.incomingRequestIngest.keda.enabled (not .Values.incomingRequestIngest.disableAutoscaler) }}
-{{- $metricsConfig := dict "enabled" .Values.incomingRequestIngest.keda.enabled "minReplicas" .Values.incomingRequestIngest.keda.minReplicas "maxReplicas" .Values.incomingRequestIngest.keda.maxReplicas "pollingInterval" .Values.incomingRequestIngest.keda.pollingInterval "cooldownPeriod" .Values.incomingRequestIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_incoming_request_ingest_queue_size" "threshold" .Values.incomingRequestIngest.keda.queueSizeThreshold "port" .Values.port.incomingRequestIngest)) }}
+{{- $metricsConfig := dict "enabled" .Values.incomingRequestIngest.keda.enabled "minReplicas" .Values.incomingRequestIngest.keda.minReplicas "maxReplicas" .Values.incomingRequestIngest.keda.maxReplicas "pollingInterval" .Values.incomingRequestIngest.keda.pollingInterval "cooldownPeriod" .Values.incomingRequestIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_incoming_request_ingest_queue_size" "threshold" .Values.incomingRequestIngest.keda.queueSizeThreshold "port" .Values.incomingRequestIngest.ports.http)) }}
 {{- $incomingRequestIngestKedaArgs := dict "ServiceName" "incoming-request-ingest" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.incomingRequestIngest.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $incomingRequestIngestKedaArgs }}
 {{- end }}

 {{/* Server Monitor Ingest KEDA ScaledObject */}}
 {{- if and .Values.keda.enabled .Values.serverMonitorIngest.keda.enabled (not .Values.serverMonitorIngest.disableAutoscaler) }}
-{{- $metricsConfig := dict "enabled" .Values.serverMonitorIngest.keda.enabled "minReplicas" .Values.serverMonitorIngest.keda.minReplicas "maxReplicas" .Values.serverMonitorIngest.keda.maxReplicas "pollingInterval" .Values.serverMonitorIngest.keda.pollingInterval "cooldownPeriod" .Values.serverMonitorIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_server_monitor_ingest_queue_size" "threshold" .Values.serverMonitorIngest.keda.queueSizeThreshold "port" .Values.port.serverMonitorIngest)) }}
+{{- $metricsConfig := dict "enabled" .Values.serverMonitorIngest.keda.enabled "minReplicas" .Values.serverMonitorIngest.keda.minReplicas "maxReplicas" .Values.serverMonitorIngest.keda.maxReplicas "pollingInterval" .Values.serverMonitorIngest.keda.pollingInterval "cooldownPeriod" .Values.serverMonitorIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_server_monitor_ingest_queue_size" "threshold" .Values.serverMonitorIngest.keda.queueSizeThreshold "port" .Values.serverMonitorIngest.ports.http)) }}
 {{- $serverMonitorIngestKedaArgs := dict "ServiceName" "server-monitor-ingest" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.serverMonitorIngest.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $serverMonitorIngestKedaArgs }}
 {{- end }}

 {{/* Probe Ingest KEDA ScaledObject */}}
 {{- if and .Values.keda.enabled .Values.probeIngest.keda.enabled (not .Values.probeIngest.disableAutoscaler) }}
-{{- $metricsConfig := dict "enabled" .Values.probeIngest.keda.enabled "minReplicas" .Values.probeIngest.keda.minReplicas "maxReplicas" .Values.probeIngest.keda.maxReplicas "pollingInterval" .Values.probeIngest.keda.pollingInterval "cooldownPeriod" .Values.probeIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_probe_ingest_queue_size" "threshold" .Values.probeIngest.keda.queueSizeThreshold "port" .Values.port.probeIngest)) }}
+{{- $metricsConfig := dict "enabled" .Values.probeIngest.keda.enabled "minReplicas" .Values.probeIngest.keda.minReplicas "maxReplicas" .Values.probeIngest.keda.maxReplicas "pollingInterval" .Values.probeIngest.keda.pollingInterval "cooldownPeriod" .Values.probeIngest.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_probe_ingest_queue_size" "threshold" .Values.probeIngest.keda.queueSizeThreshold "port" .Values.probeIngest.ports.http)) }}
 {{- $probeIngestKedaArgs := dict "ServiceName" "probe-ingest" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.probeIngest.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $probeIngestKedaArgs }}
+{{- end }}
+
+{{/* Probe KEDA ScaledObjects - one for each probe configuration */}}
+{{- range $key, $val := $.Values.probes }}
+{{- if and $.Values.keda.enabled $val.keda.enabled (not $val.disableAutoscaler) }}
+{{- $serviceName := printf "probe-%s" $key }}
+{{- $probePort := 3874 }}
+{{- if and $val.ports $val.ports.http }}
+{{- $probePort = $val.ports.http }}
+{{- end }}
+{{- $metricsConfig := dict "enabled" $val.keda.enabled "minReplicas" $val.keda.minReplicas "maxReplicas" $val.keda.maxReplicas "pollingInterval" $val.keda.pollingInterval "cooldownPeriod" $val.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_probe_queue_size" "threshold" $val.keda.queueSizeThreshold "port" $probePort)) }}
+{{- $probeKedaArgs := dict "ServiceName" $serviceName "Release" $.Release "Values" $.Values "MetricsConfig" $metricsConfig "DisableAutoscaler" $val.disableAutoscaler }}
+{{- include "oneuptime.kedaScaledObject" $probeKedaArgs }}
+{{- end }}
 {{- end }}
--- a/HelmChart/Public/oneuptime/templates/nginx.yaml
+++ b/HelmChart/Public/oneuptime/templates/nginx.yaml
@@ -112,7 +112,7 @@ spec:
            - name: NGINX_LISTEN_OPTIONS
              value: {{ $.Values.nginx.listenOptions | quote }}
            - name: ONEUPTIME_HTTP_PORT
-              value: {{ $.Values.port.nginxHttp | quote }}
+              value: {{ $.Values.nginx.ports.http | quote }}
            - name: PORT
              value: "7851" # Port for the nodejs server for live and ready status
            - name: DISABLE_TELEMETRY
@@ -158,10 +158,10 @@ spec:
    {{- end }}
  {{- end }}
  ports:
-    - port: {{ $.Values.port.nginxHttp }}
+    - port: {{ $.Values.nginx.ports.http }}
      targetPort: 7849
      name: oneuptime-http
-    - port: {{ $.Values.port.statusPageHttpsPort }}
+    - port: {{ $.Values.nginx.ports.https }}
      targetPort: 7850
      name: statuspage-ssl
  selector:
--- a/HelmChart/Public/oneuptime/templates/open-telemetry-ingest.yaml
+++ b/HelmChart/Public/oneuptime/templates/open-telemetry-ingest.yaml
@@ -57,7 +57,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.openTelemetryIngest }}
+              port: {{ $.Values.openTelemetryIngest.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -66,7 +66,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.openTelemetryIngest }}
+              port: {{ $.Values.openTelemetryIngest.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -76,7 +76,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.openTelemetryIngest }}
+              port: {{ $.Values.openTelemetryIngest.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -90,11 +90,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.openTelemetryIngest | quote }} 
+              value: {{ $.Values.openTelemetryIngest.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.openTelemetryIngest.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.openTelemetryIngest }}
+            - containerPort: {{ $.Values.openTelemetryIngest.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.openTelemetryIngest.resources }}
@@ -106,7 +106,7 @@ spec:
 ---

 # OneUptime open-telemetry-ingest Service
-{{- $openTelemetryIngestPorts := dict "port" $.Values.port.openTelemetryIngest -}}
+{{- $openTelemetryIngestPorts := dict "port" $.Values.openTelemetryIngest.ports.http -}}
 {{- $openTelemetryIngestServiceArgs := dict "ServiceName" "open-telemetry-ingest" "Ports" $openTelemetryIngestPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $openTelemetryIngestServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/otel-collector.yaml
+++ b/HelmChart/Public/oneuptime/templates/otel-collector.yaml
@@ -91,7 +91,7 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.otelCollectorGrpc | quote }} 
+              value: {{ $.Values.openTelemetryCollector.ports.grpc | quote }} 
            - name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED
              value: {{ $.Values.openTelemetryCollector.sendingQueue.enabled | quote }}
            - name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS
@@ -101,10 +101,10 @@ spec:
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.openTelemetryCollector.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.otelCollectorHttp }}
+            - containerPort: {{ $.Values.openTelemetryCollector.ports.http }}
              protocol: TCP
              name: http
-            - containerPort: {{ $.Values.port.otelCollectorGrpc }}
+            - containerPort: {{ $.Values.openTelemetryCollector.ports.grpc }}
              protocol: TCP
              name: grpc
          {{- if $.Values.openTelemetryCollector.resources }}
@@ -115,7 +115,7 @@ spec:
 ---

 # OneUptime otel-collector Service
-{{- $otelCollectorPorts := dict "grpc" $.Values.port.otelCollectorGrpc "http" $.Values.port.otelCollectorHttp -}}
+{{- $otelCollectorPorts := dict "grpc" $.Values.openTelemetryCollector.ports.grpc "http" $.Values.openTelemetryCollector.ports.http -}}
 {{- $identityServiceArgs := dict "ServiceName" "otel-collector" "Ports" $otelCollectorPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $identityServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/probe-ingest.yaml
+++ b/HelmChart/Public/oneuptime/templates/probe-ingest.yaml
@@ -57,7 +57,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.probeIngest }}
+              port: {{ $.Values.probeIngest.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -66,7 +66,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.probeIngest }}
+              port: {{ $.Values.probeIngest.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -76,7 +76,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.probeIngest }}
+              port: {{ $.Values.probeIngest.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -90,11 +90,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.probeIngest | quote }} 
+              value: {{ $.Values.probeIngest.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.probeIngest.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.probeIngest }}
+            - containerPort: {{ $.Values.probeIngest.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.probeIngest.resources }}
@@ -106,13 +106,13 @@ spec:
 ---

 # OneUptime probe-ingest Service
-{{- $probeIngestPorts := dict "port" $.Values.port.probeIngest -}}
+{{- $probeIngestPorts := dict "port" $.Values.probeIngest.ports.http -}}
 {{- $probeIngestServiceArgs := dict "ServiceName" "probe-ingest" "Ports" $probeIngestPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $probeIngestServiceArgs }}
 ---

 # OneUptime probe-ingest autoscaler
-{{- if not $.Values.probeIngest.disableAutoscaler }}
+{{- if and (not $.Values.probeIngest.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.probeIngest.keda.enabled)) }}
 {{- $probeIngestAutoScalerArgs := dict "ServiceName" "probe-ingest" "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.autoscaler" $probeIngestAutoScalerArgs }}
 {{- end }}
--- a/HelmChart/Public/oneuptime/templates/probe.yaml
+++ b/HelmChart/Public/oneuptime/templates/probe.yaml
@@ -59,13 +59,17 @@ spec:
            - name: LOG_LEVEL
              value: {{ $.Values.logLevel }}
            - name: PORT
-              value: {{ $.Values.port.probe | squote }}
+              {{- if and $val.ports $val.ports.http }}
+              value: {{ $val.ports.http | squote }}
+              {{- else }}
+              value: "3874"
+              {{- end }}
            - name: OPENTELEMETRY_EXPORTER_OTLP_HEADERS
              value: {{ $.Values.openTelemetryExporter.headers }}
            - name: OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT
              value: {{ $.Values.openTelemetryExporter.endpoint }}
            - name: ONEUPTIME_URL
-              value: http://{{ $.Release.Name }}-probe-ingest.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}:{{ $.Values.port.probeIngest }}
+              value: http://{{ $.Release.Name }}-probe-ingest.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}:{{ $.Values.probeIngest.ports.http }}
            - name: PROBE_NAME
              value: {{ $val.name }}
            - name: PROBE_DESCRIPTION
@@ -100,6 +104,10 @@ spec:
              value: {{ $val.disableTelemetryCollection | quote }}
            {{- end }}
            {{- include "oneuptime.env.oneuptimeSecret" $ | nindent 12 }}
+          ports:
+            - containerPort: {{ if and $val.ports $val.ports.http }}{{ $val.ports.http }}{{ else }}3874{{ end }}
+              protocol: TCP
+              name: http
          {{- if $val.resources }}
          resources:
            {{- toYaml $val.resources | nindent 12 }}
@@ -110,12 +118,22 @@ spec:
      restartPolicy: {{ $.Values.image.restartPolicy }}
 ---

-{{- if not $val.disableAutoscaler }}
+# OneUptime probe Service
+{{- $probePort := 3874 }}
+{{- if and $val.ports $val.ports.http }}
+{{- $probePort = $val.ports.http }}
+{{- end }}
+{{- $probePorts := dict "port" $probePort -}}
+{{- $probeServiceArgs := dict "ServiceName" (printf "probe-%s" $key) "Ports" $probePorts "Release" $.Release "Values" $.Values -}}
+{{- include "oneuptime.service" $probeServiceArgs }}
+---
+
+{{- if and (not $val.disableAutoscaler) (not (and $.Values.keda.enabled $val.keda.enabled)) }}
 # OneUptime probe autoscaler
 {{- $probeAutoScalerArgs := dict "ServiceName" (printf "probe-%s" $key) "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.autoscaler" $probeAutoScalerArgs }}
+{{- end }}
 ---
-{{- end }}

 {{- end }}

--- a/HelmChart/Public/oneuptime/templates/server-monitor-ingest.yaml
+++ b/HelmChart/Public/oneuptime/templates/server-monitor-ingest.yaml
@@ -57,7 +57,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.serverMonitorIngest }}
+              port: {{ $.Values.serverMonitorIngest.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -66,7 +66,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.serverMonitorIngest }}
+              port: {{ $.Values.serverMonitorIngest.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -76,7 +76,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.serverMonitorIngest }}
+              port: {{ $.Values.serverMonitorIngest.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -90,11 +90,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.serverMonitorIngest | quote }} 
+              value: {{ $.Values.serverMonitorIngest.ports.http | quote }} 
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.serverMonitorIngest.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.serverMonitorIngest }}
+            - containerPort: {{ $.Values.serverMonitorIngest.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.serverMonitorIngest.resources }}
@@ -106,13 +106,13 @@ spec:
 ---

 # OneUptime server-monitor-ingest Service
-{{- $serverMonitorIngestPorts := dict "port" $.Values.port.serverMonitorIngest -}}
+{{- $serverMonitorIngestPorts := dict "port" $.Values.serverMonitorIngest.ports.http -}}
 {{- $serverMonitorIngestServiceArgs := dict "ServiceName" "server-monitor-ingest" "Ports" $serverMonitorIngestPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $serverMonitorIngestServiceArgs }}
 ---

 # OneUptime server-monitor-ingest autoscaler
-{{- if not $.Values.serverMonitorIngest.disableAutoscaler }}
+{{- if and (not $.Values.serverMonitorIngest.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.serverMonitorIngest.keda.enabled)) }}
 {{- $serverMonitorIngestAutoScalerArgs := dict "ServiceName" "server-monitor-ingest" "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.autoscaler" $serverMonitorIngestAutoScalerArgs }}
 {{- end }}
--- a/HelmChart/Public/oneuptime/templates/status-page.yaml
+++ b/HelmChart/Public/oneuptime/templates/status-page.yaml
@@ -1,12 +1,12 @@
 # OneUptime statusPage Deployment
-{{- $statusPagePorts := dict "port" $.Values.port.statusPage -}}
-{{- $statusPageEnv := dict "PORT"  $.Values.port.statusPage "DISABLE_TELEMETRY" $.Values.statusPage.disableTelemetryCollection -}}
+{{- $statusPagePorts := dict "port" $.Values.statusPage.ports.http -}}
+{{- $statusPageEnv := dict "PORT"  $.Values.statusPage.ports.http "DISABLE_TELEMETRY" $.Values.statusPage.disableTelemetryCollection -}}
 {{- $statusPageDeploymentArgs :=dict "IsUI" true "ServiceName" "status-page" "Ports" $statusPagePorts "Release" $.Release "Values" $.Values "Env" $statusPageEnv "Resources" $.Values.statusPage.resources "DisableAutoscaler" $.Values.statusPage.disableAutoscaler "ReplicaCount" $.Values.statusPage.replicaCount -}}
 {{- include "oneuptime.deployment" $statusPageDeploymentArgs }}
 ---

 # OneUptime statusPage Service
-{{- $statusPagePorts := dict "port" $.Values.port.statusPage -}}
+{{- $statusPagePorts := dict "port" $.Values.statusPage.ports.http -}}
 {{- $statusPageServiceArgs := dict "ServiceName" "status-page" "Ports" $statusPagePorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $statusPageServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/test-server.yaml
+++ b/HelmChart/Public/oneuptime/templates/test-server.yaml
@@ -1,15 +1,15 @@
 {{- if $.Values.testServer.enabled }}

-# OneUptime testServer Deployment
-{{- $testServerPorts := dict "port" $.Values.port.testServer -}}
-{{- $testServerEnv := dict "PORT"  $.Values.port.testServer  "DISABLE_TELEMETRY" $.Values.testServer.disableTelemetryCollection -}}
+# OneUptime test-server Deployment
+{{- $testServerPorts := $.Values.testServer.ports -}}
+{{- $testServerEnv := dict "PORT"  $.Values.testServer.ports.http  "DISABLE_TELEMETRY" $.Values.testServer.disableTelemetryCollection -}}
 {{- $testServerDeploymentArgs :=dict "IsUI" true "ServiceName" "test-server" "Ports" $testServerPorts "Release" $.Release "Values" $.Values "Env" $testServerEnv "Resources" $.Values.testServer.resources "DisableAutoscaler" $.Values.testServer.disableAutoscaler "ReplicaCount" $.Values.testServer.replicaCount -}}
 {{- include "oneuptime.deployment" $testServerDeploymentArgs }}
 ---

-# OneUptime testServer Service
-{{- $testServerPorts := dict "port" $.Values.port.testServer -}}
-{{- $testServerServiceArgs := dict "ServiceName" "test-server" "Ports" $testServerPorts "Release" $.Release "Values" $.Values  -}}
+# OneUptime test-server Service
+{{- $testServerPorts := $.Values.testServer.ports -}}
+{{- $testServerServiceArgs := dict "ServiceName" "test-server" "Ports" $testServerPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $testServerServiceArgs }}
 ---

--- a/HelmChart/Public/oneuptime/templates/worker.yaml
+++ b/HelmChart/Public/oneuptime/templates/worker.yaml
@@ -52,7 +52,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.worker }}
+              port: {{ $.Values.worker.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -61,7 +61,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.worker }}
+              port: {{ $.Values.worker.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -71,7 +71,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.worker }}
+              port: {{ $.Values.worker.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -85,11 +85,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.worker | quote }}
+              value: {{ $.Values.worker.ports.http | quote }}
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.worker.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.worker }}
+            - containerPort: {{ $.Values.worker.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.worker.resources }}
@@ -101,7 +101,7 @@ spec:
 ---

 # OneUptime app Service
-{{- $workerPorts := dict "port" $.Values.port.worker -}}
+{{- $workerPorts := $.Values.worker.ports -}}
 {{- $workerServiceArgs := dict "ServiceName" "worker" "Ports" $workerPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $workerServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/templates/workflow.yaml
+++ b/HelmChart/Public/oneuptime/templates/workflow.yaml
@@ -52,7 +52,7 @@ spec:
          startupProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.workflow }}
+              port: {{ $.Values.workflow.ports.http }}
            periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
            failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
          {{- end }}
@@ -61,7 +61,7 @@ spec:
          livenessProbe:
            httpGet:
              path: /status/live
-              port: {{ $.Values.port.workflow }}
+              port: {{ $.Values.workflow.ports.http }}
            periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
            timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
            initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
@@ -71,7 +71,7 @@ spec:
          readinessProbe:
            httpGet:
              path: /status/ready
-              port: {{ $.Values.port.workflow }}
+              port: {{ $.Values.workflow.ports.http }}
            periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
            initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
            timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
@@ -85,11 +85,11 @@ spec:
            {{- include "oneuptime.env.commonServer" . | nindent 12 }}
            {{- include "oneuptime.env.oneuptimeSecret" . | nindent 12 }}
            - name: PORT
-              value: {{ $.Values.port.workflow | quote }}
+              value: {{ $.Values.workflow.ports.http | quote }}
            - name: DISABLE_TELEMETRY
              value: {{ $.Values.workflow.disableTelemetryCollection | quote }}
          ports:
-            - containerPort: {{ $.Values.port.workflow }}
+            - containerPort: {{ $.Values.workflow.ports.http }}
              protocol: TCP
              name: http
          {{- if $.Values.workflow.resources }}
@@ -102,7 +102,7 @@ spec:
 ---

 # OneUptime app Service
-{{- $workflowPorts := dict "port" $.Values.port.workflow -}}
+{{- $workflowPorts := $.Values.workflow.ports -}}
 {{- $workflowServiceArgs := dict "ServiceName" "workflow" "Ports" $workflowPorts "Release" $.Release "Values" $.Values -}}
 {{- include "oneuptime.service" $workflowServiceArgs }}
 ---
--- a/HelmChart/Public/oneuptime/values.yaml
+++ b/HelmChart/Public/oneuptime/values.yaml
@@ -43,6 +43,9 @@ nginx:
  disableAutoscaler: false
  listenAddress: ""
  listenOptions: ""
+  ports:
+    http: 80
+    https: 443
  service:
    loadBalancerIP:
    type: LoadBalancer
@@ -184,6 +187,8 @@ statusPage:
  cnameRecord:
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3105

 probes:
  one:
@@ -198,6 +203,19 @@ probes:
    customCodeMonitorScriptTimeoutInMs: 60000
    disableTelemetryCollection: false
    disableAutoscaler: false
+    ports:
+      http: 3874
+    # KEDA autoscaling configuration based on monitor queue metrics
+    keda:
+      enabled: false
+      minReplicas: 1
+      maxReplicas: 100
+      # Scale up when queue size exceeds this threshold per probe
+      queueSizeThreshold: 10
+      # Polling interval for metrics (in seconds)
+      pollingInterval: 30
+      # Cooldown period after scaling (in seconds)
+      cooldownPeriod: 300
 #   resources:
 #   additionalContainers:
 # two:
@@ -207,44 +225,34 @@ probes:
 #   monitorFetchLimit: 10
 #   key:
 #   replicaCount: 1
+#   ports:
+#     http: 3874
 #   syntheticMonitorScriptTimeoutInMs: 60000
 #   customCodeMonitorScriptTimeoutInMs: 60000
 #   disableTelemetryCollection: false
 #   disableAutoscaler: false
 #   resources:
 #   additionalContainers:
+    # KEDA autoscaling configuration based on monitor queue metrics
+    # keda:
+    #   enabled: false
+    #   minReplicas: 1
+    #   maxReplicas: 100
+    #   # Scale up when queue size exceeds this threshold per probe
+    #   queueSizeThreshold: 10
+    #   # Polling interval for metrics (in seconds)
+    #   pollingInterval: 30
+    #   # Cooldown period after scaling (in seconds)
+    #   cooldownPeriod: 300


-port:
-  app: 3002
-  probeIngest: 3400
-  serverMonitorIngest: 3404
-  openTelemetryIngest: 3403
-  fluentIngest: 3401
-  incomingRequestIngest: 3402
-  testServer: 3800
-  accounts: 3003
-  statusPage: 3105
-  dashboard: 3009
-  adminDashboard: 3158
-  # This is where oneuptime server is hosted on.
-  nginxHttp: 80
-  # If you are connecting Status Pages to custom domains, then this will be the port where the status page will be hosted on.
-  statusPageHttpsPort: 443
-  otelCollectorGrpc: 4317
-  otelCollectorHttp: 4318
-  isolatedVM: 4572
-  home: 1444
-  worker: 1445
-  workflow: 3099
-  apiReference: 1446
-  docs: 1447
-
 testServer:
  replicaCount: 1
  enabled: false
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3800


 openTelemetryExporter:
@@ -425,6 +433,9 @@ openTelemetryCollector:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    grpc: 4317
+    http: 4318
  sendingQueue: 
    enabled: true
    size: 1000
@@ -434,30 +445,40 @@ accounts:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3003
  resources:

 home: 
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 1444
  resources:

 dashboard:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3009
  resources:

 adminDashboard:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3158
  resources:

 worker:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 1445
  resources:

 workflow:
@@ -465,30 +486,40 @@ workflow:
  disableTelemetryCollection: false
  disableAutoscaler: false
  workflowTimeoutInMs: 5000
+  ports:
+    http: 3099
  resources:

 apiReference:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 1446
  resources:

 docs:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 1447
  resources:

 app: 
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3002
  resources:

 probeIngest:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3400
  resources:
  # KEDA autoscaling configuration based on queue metrics
  keda:
@@ -506,6 +537,8 @@ openTelemetryIngest:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3403
  resources:
  # KEDA autoscaling configuration based on queue metrics
  keda:
@@ -523,6 +556,8 @@ fluentIngest:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3401
  resources:
  # KEDA autoscaling configuration based on queue metrics
  keda:
@@ -540,6 +575,8 @@ incomingRequestIngest:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3402
  resources:
  # KEDA autoscaling configuration based on queue metrics
  keda:
@@ -557,12 +594,16 @@ isolatedVM:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 4572
  resources:

 serverMonitorIngest:
  replicaCount: 1
  disableTelemetryCollection: false
  disableAutoscaler: false
+  ports:
+    http: 3404
  resources:
  # KEDA autoscaling configuration based on queue metrics
  keda:
--- a/IncomingRequestIngest/API/IncomingRequest.ts
+++ b/IncomingRequestIngest/API/IncomingRequest.ts
@@ -105,7 +105,8 @@ router.get(
    next: NextFunction,
  ): Promise<void> => {
    try {
-      const size: number = await IncomingRequestIngestQueueService.getQueueSize();
+      const size: number =
+        await IncomingRequestIngestQueueService.getQueueSize();
      return Response.sendJsonObjectResponse(req, res, { size });
    } catch (err) {
      return next(err);
--- a/IncomingRequestIngest/API/Metrics.ts
+++ b/IncomingRequestIngest/API/Metrics.ts
@@ -22,7 +22,8 @@ router.get(
    next: NextFunction,
  ): Promise<void> => {
    try {
-      const queueSize: number = await IncomingRequestIngestQueueService.getQueueSize();
+      const queueSize: number =
+        await IncomingRequestIngestQueueService.getQueueSize();

      res.setHeader("Content-Type", "application/json");
      res.status(200).json({
--- a/IncomingRequestIngest/Jobs/IncomingRequestIngest/ProcessIncomingRequestIngest.ts
+++ b/IncomingRequestIngest/Jobs/IncomingRequestIngest/ProcessIncomingRequestIngest.ts
@@ -98,12 +98,7 @@ async function processIncomingRequestFromQueue(
  };

  // process probe response here.
-  MonitorResourceUtil.monitorResource(incomingRequest).catch((err: Error) => {
-    // do nothing.
-    // we don't want to throw error here.
-    // we just want to log the error.
-    logger.error(err);
-  });
+  await MonitorResourceUtil.monitorResource(incomingRequest);
 }

 logger.debug("Incoming request ingest worker initialized");
--- a/IncomingRequestIngest/Services/Queue/IncomingRequestIngestQueueService.ts
+++ b/IncomingRequestIngest/Services/Queue/IncomingRequestIngestQueueService.ts
@@ -13,14 +13,12 @@ export interface IncomingRequestIngestJobData {
 }

 export default class IncomingRequestIngestQueueService {
-  public static async addIncomingRequestIngestJob(
-    data: {
-      secretKey: string;
-      requestHeaders: Dictionary<string>;
-      requestBody: string | JSONObject;
-      requestMethod: string;
-    },
-  ): Promise<void> {
+  public static async addIncomingRequestIngestJob(data: {
+    secretKey: string;
+    requestHeaders: Dictionary<string>;
+    requestBody: string | JSONObject;
+    requestMethod: string;
+  }): Promise<void> {
    try {
      const jobData: IncomingRequestIngestJobData = {
        secretKey: data.secretKey,
--- a/Probe/API/Metrics.ts
+++ b/Probe/API/Metrics.ts
@@ -0,0 +1,84 @@
+import Express, {
+  ExpressRequest,
+  ExpressResponse,
+  ExpressRouter,
+  NextFunction,
+} from "Common/Server/Utils/Express";
+import Response from "Common/Server/Utils/Response";
+import { PROBE_INGEST_URL } from "../Config";
+import HTTPErrorResponse from "Common/Types/API/HTTPErrorResponse";
+import HTTPMethod from "Common/Types/API/HTTPMethod";
+import HTTPResponse from "Common/Types/API/HTTPResponse";
+import URL from "Common/Types/API/URL";
+import { JSONObject } from "Common/Types/JSON";
+import API from "Common/Utils/API";
+import logger from "Common/Server/Utils/Logger";
+import ProbeAPIRequest from "../Utils/ProbeAPIRequest";
+
+const router: ExpressRouter = Express.getRouter();
+
+// Metrics endpoint for Keda autoscaling
+router.get(
+  "/queue-size",
+  async (
+    req: ExpressRequest,
+    res: ExpressResponse,
+    next: NextFunction,
+  ): Promise<void> => {
+    try {
+      // Get the pending monitor count for this specific probe from ProbeIngest API
+      const queueSizeUrl: URL = URL.fromString(
+        PROBE_INGEST_URL.toString(),
+      ).addRoute("/metrics/queue-size");
+
+      logger.debug("Fetching queue size from ProbeIngest API");
+
+      // Use probe authentication (probe key and probe ID)
+      const requestBody: JSONObject = ProbeAPIRequest.getDefaultRequestBody();
+
+      const result: HTTPResponse<JSONObject> | HTTPErrorResponse =
+        await API.fetch<JSONObject>(
+          HTTPMethod.POST,
+          queueSizeUrl,
+          requestBody,
+          {},
+        );
+
+      if (result instanceof HTTPErrorResponse) {
+        logger.error("Error fetching queue size from ProbeIngest API");
+        logger.error(result);
+        throw result;
+      }
+
+      logger.debug("Queue size fetched successfully from ProbeIngest API");
+      logger.debug(result.data);
+
+      // Extract queueSize from the response
+      let queueSize: number = result.data['queueSize'] as number || 0;
+
+      // if string then convert to number
+
+      if (typeof queueSize === 'string') {
+        const parsedQueueSize = parseInt(queueSize, 10);
+        if (!isNaN(parsedQueueSize)) {
+          queueSize = parsedQueueSize;
+        } else {
+          logger.warn("Queue size is not a valid number, defaulting to 0");
+          queueSize = 0;
+        }
+      }
+
+      logger.debug(`Queue size fetched: ${queueSize}`);
+
+      return Response.sendJsonObjectResponse(req, res, {
+        queueSize: queueSize,
+      });
+    } catch (err) {
+      logger.error("Error in metrics queue-size endpoint");
+      logger.error(err);
+      return next(err);
+    }
+  },
+);
+
+export default router;
--- a/Probe/Index.ts
+++ b/Probe/Index.ts
@@ -3,10 +3,12 @@ import AliveJob from "./Jobs/Alive";
 import FetchMonitorList from "./Jobs/Monitor/FetchList";
 import FetchMonitorTestList from "./Jobs/Monitor/FetchMonitorTest";
 import Register from "./Services/Register";
+import MetricsAPI from "./API/Metrics";
 import { PromiseVoidFunction } from "Common/Types/FunctionTypes";
 import logger from "Common/Server/Utils/Logger";
 import App from "Common/Server/Utils/StartServer";
 import Telemetry from "Common/Server/Utils/Telemetry";
+import Express, { ExpressApplication } from "Common/Server/Utils/Express";
 import "ejs";

 const APP_NAME: string = "probe";
@@ -29,6 +31,10 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
      },
    });

+    // Add metrics API routes
+    const app: ExpressApplication = Express.getExpressApp();
+    app.use("/metrics", MetricsAPI);
+
    // add default routes
    await App.addDefaultRoutes();

--- a/ProbeIngest/API/Probe.ts
+++ b/ProbeIngest/API/Probe.ts
@@ -24,6 +24,12 @@ import Probe from "Common/Models/DatabaseModels/Probe";
 import User from "Common/Models/DatabaseModels/User";
 import ProbeIngestQueueService from "../Services/Queue/ProbeIngestQueueService";
 import ClusterKeyAuthorization from "Common/Server/Middleware/ClusterKeyAuthorization";
+import PositiveNumber from "Common/Types/PositiveNumber";
+import MonitorProbeService from "Common/Server/Services/MonitorProbeService";
+import QueryHelper from "Common/Server/Types/Database/QueryHelper";
+import OneUptimeDate from "Common/Types/Date";
+import MonitorService from "Common/Server/Services/MonitorService";
+import { IsBillingEnabled } from "Common/Server/EnvironmentConfig";

 const router: ExpressRouter = Express.getRouter();

@@ -199,28 +205,33 @@ router.post(
        }

        // now send an email to all the emailsToNotify
-        for (const email of emailsToNotify) {
-          MailService.sendMail(
-            {
-              toEmail: email,
-              templateType: EmailTemplateType.ProbeOffline,
-              subject: "ACTION REQUIRED: Probe Offline Notification",
-              vars: {
-                probeName: probe.name || "",
-                probeDescription: probe.description || "",
-                projectId: probe.projectId?.toString() || "",
-                probeId: probe.id?.toString() || "",
-                hostname: statusReport["hostname"]?.toString() || "",
-                emailReason: emailReason,
-                issue: issue,
+        // Skip sending email if billing is enabled
+        if (!IsBillingEnabled) {
+          for (const email of emailsToNotify) {
+            MailService.sendMail(
+              {
+                toEmail: email,
+                templateType: EmailTemplateType.ProbeOffline,
+                subject: "ACTION REQUIRED: Probe Offline Notification",
+                vars: {
+                  probeName: probe.name || "",
+                  probeDescription: probe.description || "",
+                  projectId: probe.projectId?.toString() || "",
+                  probeId: probe.id?.toString() || "",
+                  hostname: statusReport["hostname"]?.toString() || "",
+                  emailReason: emailReason,
+                  issue: issue,
+                },
              },
-            },
-            {
-              projectId: probe.projectId,
-            },
-          ).catch((err: Error) => {
-            logger.error(err);
-          });
+              {
+                projectId: probe.projectId,
+              },
+            ).catch((err: Error) => {
+              logger.error(err);
+            });
+          }
+        } else {
+          logger.debug("Billing is enabled, skipping probe offline email notification");
        }
      }

@@ -363,6 +374,60 @@ router.get(
  },
 );

+// Queue size endpoint for Keda autoscaling (returns pending monitors count for specific probe)
+router.post(
+  "/metrics/queue-size",
+  ProbeAuthorization.isAuthorizedServiceMiddleware,
+  async (
+    req: ExpressRequest,
+    res: ExpressResponse,
+    next: NextFunction,
+  ): Promise<void> => {
+    try {
+      // This endpoint returns the number of monitors pending for the specific probe
+      // to be used by Keda for autoscaling probe replicas
+
+      // Get the probe ID from the authenticated request
+      const data: JSONObject = req.body;
+      const probeId: ObjectID = new ObjectID(data["probeId"] as string);
+
+      if (!probeId) {
+        return Response.sendErrorResponse(
+          req,
+          res,
+          new BadDataException("Probe ID not found"),
+        );
+      }
+
+      // Get pending monitor count for this specific probe
+      const pendingCount: PositiveNumber = await MonitorProbeService.countBy({
+        query: {
+          probeId: probeId,
+          isEnabled: true,
+          nextPingAt: QueryHelper.lessThanEqualToOrNull(
+            OneUptimeDate.getSomeMinutesAgo(2)
+          ),
+          monitor: {
+            ...MonitorService.getEnabledMonitorQuery(),
+          },
+          project: {
+            ...ProjectService.getActiveProjectStatusQuery(),
+          },
+        },
+        props: {
+          isRoot: true,
+        },
+      });
+
+      return Response.sendJsonObjectResponse(req, res, {
+        queueSize: pendingCount.toNumber(),
+      });
+    } catch (err) {
+      return next(err);
+    }
+  },
+);
+
 // Queue failed jobs endpoint
 router.get(
  "/probe/queue/failed",
--- a/ProbeIngest/Jobs/ProbeIngest/ProcessProbeIngest.ts
+++ b/ProbeIngest/Jobs/ProbeIngest/ProcessProbeIngest.ts
@@ -22,9 +22,7 @@ QueueWorker.getWorker(

      await processProbeFromQueue(jobData);

-      logger.debug(
-        `Successfully processed probe ingestion job: ${job.name}`,
-      );
+      logger.debug(`Successfully processed probe ingestion job: ${job.name}`);
    } catch (error) {
      logger.error(`Error processing probe ingestion job:`);
      logger.error(error);
@@ -50,10 +48,7 @@ async function processProbeFromQueue(

  if (jobData.jobType === "probe-response") {
    // Handle regular probe response
-    MonitorResourceUtil.monitorResource(probeResponse).catch((err: Error) => {
-      logger.error("Error in monitor resource");
-      logger.error(err);
-    });
+    await MonitorResourceUtil.monitorResource(probeResponse);
  } else if (jobData.jobType === "monitor-test" && jobData.testId) {
    // Handle monitor test response
    const testId: ObjectID = new ObjectID(jobData.testId);
--- a/ProbeIngest/Services/Queue/ProbeIngestQueueService.ts
+++ b/ProbeIngest/Services/Queue/ProbeIngestQueueService.ts
@@ -11,13 +11,11 @@ export interface ProbeIngestJobData {
 }

 export default class ProbeIngestQueueService {
-  public static async addProbeIngestJob(
-    data: {
-      probeMonitorResponse: JSONObject;
-      jobType: "probe-response" | "monitor-test";
-      testId?: string;
-    },
-  ): Promise<void> {
+  public static async addProbeIngestJob(data: {
+    probeMonitorResponse: JSONObject;
+    jobType: "probe-response" | "monitor-test";
+    testId?: string;
+  }): Promise<void> {
    try {
      const jobData: ProbeIngestJobData = {
        probeMonitorResponse: data.probeMonitorResponse,
--- a/ServerMonitorIngest/API/Metrics.ts
+++ b/ServerMonitorIngest/API/Metrics.ts
@@ -22,7 +22,8 @@ router.get(
    next: NextFunction,
  ): Promise<void> => {
    try {
-      const queueSize: number = await ServerMonitorIngestQueueService.getQueueSize();
+      const queueSize: number =
+        await ServerMonitorIngestQueueService.getQueueSize();

      res.setHeader("Content-Type", "application/json");
      res.status(200).json({
--- a/ServerMonitorIngest/Services/Queue/ServerMonitorIngestQueueService.ts
+++ b/ServerMonitorIngest/Services/Queue/ServerMonitorIngestQueueService.ts
@@ -10,12 +10,10 @@ export interface ServerMonitorIngestJobData {
 }

 export default class ServerMonitorIngestQueueService {
-  public static async addServerMonitorIngestJob(
-    data: {
-      secretKey: string;
-      serverMonitorResponse: JSONObject;
-    },
-  ): Promise<void> {
+  public static async addServerMonitorIngestJob(data: {
+    secretKey: string;
+    serverMonitorResponse: JSONObject;
+  }): Promise<void> {
    try {
      const jobData: ServerMonitorIngestJobData = {
        secretKey: data.secretKey,
Author	SHA1	Message	Date
Simon Larsen	ba49aaf0c3	fix: Skip probe offline email notifications when billing is enabled	2025-08-02 12:36:50 +01:00
Simon Larsen	6ea5ad7fe8	fix: Update nextPingAt calculation to use a 2-minute offset for improved timing accuracy	2025-08-02 11:42:01 +01:00
Simon Larsen	962866d109	fix: Improve queue size extraction and handling in metrics endpoint	2025-08-01 20:58:58 +01:00
Simon Larsen	115216561c	feat: Add ports configuration for OneUptime probe service	2025-08-01 20:36:30 +01:00
Simon Larsen	f709c90cc4	fix: Update probe port handling in KEDA ScaledObjects for improved configuration	2025-08-01 20:21:41 +01:00
Simon Larsen	d7f01b0189	fix: Update default port value in probe template for better configuration handling	2025-08-01 20:19:31 +01:00
Simon Larsen	c3eaa8995c	fix ports	2025-08-01 20:19:10 +01:00
Simon Larsen	53b482b9f3	refactor: Update Helm templates to use new port structure in values.yaml	2025-08-01 20:13:30 +01:00
Simon Larsen	d52670f39c	refactor: Update Helm templates to use new port structure in values.yaml	2025-08-01 18:22:05 +01:00
Simon Larsen	fdc1332b9e	Merge branch 'master' of github.com:OneUptime/oneuptime	2025-08-01 16:17:09 +01:00
Simon Larsen	a937416663	fix: Update autoscaler condition to prevent conflicts with KEDA configuration	2025-08-01 16:17:05 +01:00
Nawaz Dhandala	546d41da81	fix: Clean up formatting and ensure consistent return structure in metrics endpoints	2025-08-01 16:13:05 +01:00
Simon Larsen	c4c6793b29	feat: Implement KEDA autoscaling configuration for probes and add metrics endpoints	2025-08-01 15:38:04 +01:00
Simon Larsen	c894b112e6	fix: Await monitorResource call to ensure proper error handling in incoming request processing	2025-08-01 14:34:17 +01:00
Simon Larsen	304baf1bb4	fix: Await monitorResource call to ensure proper error handling in probe response processing	2025-08-01 14:33:17 +01:00
Simon Larsen	9adea6b1ba	feat: Remove Helm annotations for post-install and post-upgrade hooks from templates	2025-08-01 14:01:04 +01:00
Simon Larsen	5498521e02	feat: Add Helm annotations for post-install and post-upgrade hooks	2025-08-01 13:47:52 +01:00
Simon Larsen	9e97c6ddbc	feat: Update autoscaler conditions for fluent-ingest, incoming-request-ingest, probe-ingest, and server-monitor-ingest templates	2025-08-01 13:23:39 +01:00
Nawaz Dhandala	63272e09f8	refactor: Simplify function parameter formatting and improve readability in various files	2025-08-01 10:45:55 +01:00