From eb4010dfa523ed781a87d85ee8d777d210ce99c1 Mon Sep 17 00:00:00 2001
From: Nawaz Dhandala <hello@nawazdhandala.com>
Date: Thu, 2 Apr 2026 14:23:39 +0100
Subject: [PATCH] feat: add CPU and memory utilization metrics for KEDA
 autoscaling

---
 HelmChart/Public/oneuptime/templates/_helpers.tpl    | 12 ++++++++++++
 .../oneuptime/templates/keda-scaledobjects.yaml      |  2 +-
 HelmChart/Public/oneuptime/values.schema.json        |  6 ++++++
 HelmChart/Public/oneuptime/values.yaml               |  4 ++++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/HelmChart/Public/oneuptime/templates/_helpers.tpl b/HelmChart/Public/oneuptime/templates/_helpers.tpl
index 23b1f7e2c0..335a0bdfab 100644
--- a/HelmChart/Public/oneuptime/templates/_helpers.tpl
+++ b/HelmChart/Public/oneuptime/templates/_helpers.tpl
@@ -838,6 +838,18 @@ spec:
       # authenticationRef:
       #   name: {{ printf "%s-%s-trigger-auth" $.Release.Name $.ServiceName }}
     {{- end }}
+    {{- if and .MetricsConfig.targetCPUUtilizationPercentage (gt (int .MetricsConfig.targetCPUUtilizationPercentage) 0) }}
+    - type: cpu
+      metricType: Utilization
+      metadata:
+        value: {{ .MetricsConfig.targetCPUUtilizationPercentage | quote }}
+    {{- end }}
+    {{- if and .MetricsConfig.targetMemoryUtilizationPercentage (gt (int .MetricsConfig.targetMemoryUtilizationPercentage) 0) }}
+    - type: memory
+      metricType: Utilization
+      metadata:
+        value: {{ .MetricsConfig.targetMemoryUtilizationPercentage | quote }}
+    {{- end }}
 ---
 apiVersion: keda.sh/v1alpha1
 kind: TriggerAuthentication
diff --git a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
index bc316913d0..25233c72ab 100644
--- a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
+++ b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
@@ -19,7 +19,7 @@ KEDA ScaledObjects for various services
 
 {{/* App KEDA ScaledObject - scales based on worker and telemetry queue sizes */}}
 {{- if and .Values.keda.enabled .Values.app.enabled .Values.app.keda.enabled (not .Values.app.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
-{{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_app_worker_queue_size" "threshold" .Values.app.keda.workerQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/worker/metrics/queue-size") (dict "query" "oneuptime_app_telemetry_queue_size" "threshold" .Values.app.keda.telemetryQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/telemetry/metrics/queue-size")) }}
+{{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "targetCPUUtilizationPercentage" .Values.app.keda.targetCPUUtilizationPercentage "targetMemoryUtilizationPercentage" .Values.app.keda.targetMemoryUtilizationPercentage "triggers" (list (dict "query" "oneuptime_app_worker_queue_size" "threshold" .Values.app.keda.workerQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/worker/metrics/queue-size") (dict "query" "oneuptime_app_telemetry_queue_size" "threshold" .Values.app.keda.telemetryQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/telemetry/metrics/queue-size")) }}
 {{- $appKedaArgs := dict "ServiceName" "app" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.app.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $appKedaArgs }}
 {{- end }}
diff --git a/HelmChart/Public/oneuptime/values.schema.json b/HelmChart/Public/oneuptime/values.schema.json
index 2f1b38edcd..f1c9ef752f 100644
--- a/HelmChart/Public/oneuptime/values.schema.json
+++ b/HelmChart/Public/oneuptime/values.schema.json
@@ -1813,6 +1813,12 @@
                         "telemetryQueueSizeThreshold": {
                             "type": "integer"
                         },
+                        "targetCPUUtilizationPercentage": {
+                            "type": "integer"
+                        },
+                        "targetMemoryUtilizationPercentage": {
+                            "type": "integer"
+                        },
                         "pollingInterval": {
                             "type": "integer"
                         },
diff --git a/HelmChart/Public/oneuptime/values.yaml b/HelmChart/Public/oneuptime/values.yaml
index c1ff147aa8..df7027a8e9 100644
--- a/HelmChart/Public/oneuptime/values.yaml
+++ b/HelmChart/Public/oneuptime/values.yaml
@@ -671,6 +671,10 @@ app:
     workerQueueSizeThreshold: 10
     # Scale up when telemetry queue size exceeds this threshold
     telemetryQueueSizeThreshold: 10
+    # Scale up when average CPU utilization exceeds this percentage (0 to disable)
+    targetCPUUtilizationPercentage: 80
+    # Scale up when average memory utilization exceeds this percentage (0 to disable)
+    targetMemoryUtilizationPercentage: 80
     # Polling interval for metrics (in seconds)
     pollingInterval: 30
     # Cooldown period after scaling (in seconds)