From eb4010dfa523ed781a87d85ee8d777d210ce99c1 Mon Sep 17 00:00:00 2001 From: Nawaz Dhandala Date: Thu, 2 Apr 2026 14:23:39 +0100 Subject: [PATCH] feat: add CPU and memory utilization metrics for KEDA autoscaling --- HelmChart/Public/oneuptime/templates/_helpers.tpl | 12 ++++++++++++ .../oneuptime/templates/keda-scaledobjects.yaml | 2 +- HelmChart/Public/oneuptime/values.schema.json | 6 ++++++ HelmChart/Public/oneuptime/values.yaml | 4 ++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/HelmChart/Public/oneuptime/templates/_helpers.tpl b/HelmChart/Public/oneuptime/templates/_helpers.tpl index 23b1f7e2c0..335a0bdfab 100644 --- a/HelmChart/Public/oneuptime/templates/_helpers.tpl +++ b/HelmChart/Public/oneuptime/templates/_helpers.tpl @@ -838,6 +838,18 @@ spec: # authenticationRef: # name: {{ printf "%s-%s-trigger-auth" $.Release.Name $.ServiceName }} {{- end }} + {{- if and .MetricsConfig.targetCPUUtilizationPercentage (gt (int .MetricsConfig.targetCPUUtilizationPercentage) 0) }} + - type: cpu + metricType: Utilization + metadata: + value: {{ .MetricsConfig.targetCPUUtilizationPercentage | quote }} + {{- end }} + {{- if and .MetricsConfig.targetMemoryUtilizationPercentage (gt (int .MetricsConfig.targetMemoryUtilizationPercentage) 0) }} + - type: memory + metricType: Utilization + metadata: + value: {{ .MetricsConfig.targetMemoryUtilizationPercentage | quote }} + {{- end }} --- apiVersion: keda.sh/v1alpha1 kind: TriggerAuthentication diff --git a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml index bc316913d0..25233c72ab 100644 --- a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml +++ b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml @@ -19,7 +19,7 @@ KEDA ScaledObjects for various services {{/* App KEDA ScaledObject - scales based on worker and telemetry queue sizes */}} {{- if and .Values.keda.enabled .Values.app.enabled .Values.app.keda.enabled (not .Values.app.disableAutoscaler) (not .Values.deployment.disableDeployments) }} -{{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_app_worker_queue_size" "threshold" .Values.app.keda.workerQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/worker/metrics/queue-size") (dict "query" "oneuptime_app_telemetry_queue_size" "threshold" .Values.app.keda.telemetryQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/telemetry/metrics/queue-size")) }} +{{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "targetCPUUtilizationPercentage" .Values.app.keda.targetCPUUtilizationPercentage "targetMemoryUtilizationPercentage" .Values.app.keda.targetMemoryUtilizationPercentage "triggers" (list (dict "query" "oneuptime_app_worker_queue_size" "threshold" .Values.app.keda.workerQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/worker/metrics/queue-size") (dict "query" "oneuptime_app_telemetry_queue_size" "threshold" .Values.app.keda.telemetryQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/telemetry/metrics/queue-size")) }} {{- $appKedaArgs := dict "ServiceName" "app" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.app.disableAutoscaler }} {{- include "oneuptime.kedaScaledObject" $appKedaArgs }} {{- end }} diff --git a/HelmChart/Public/oneuptime/values.schema.json b/HelmChart/Public/oneuptime/values.schema.json index 2f1b38edcd..f1c9ef752f 100644 --- a/HelmChart/Public/oneuptime/values.schema.json +++ b/HelmChart/Public/oneuptime/values.schema.json @@ -1813,6 +1813,12 @@ "telemetryQueueSizeThreshold": { "type": "integer" }, + "targetCPUUtilizationPercentage": { + "type": "integer" + }, + "targetMemoryUtilizationPercentage": { + "type": "integer" + }, "pollingInterval": { "type": "integer" }, diff --git a/HelmChart/Public/oneuptime/values.yaml b/HelmChart/Public/oneuptime/values.yaml index c1ff147aa8..df7027a8e9 100644 --- a/HelmChart/Public/oneuptime/values.yaml +++ b/HelmChart/Public/oneuptime/values.yaml @@ -671,6 +671,10 @@ app: workerQueueSizeThreshold: 10 # Scale up when telemetry queue size exceeds this threshold telemetryQueueSizeThreshold: 10 + # Scale up when average CPU utilization exceeds this percentage (0 to disable) + targetCPUUtilizationPercentage: 80 + # Scale up when average memory utilization exceeds this percentage (0 to disable) + targetMemoryUtilizationPercentage: 80 # Polling interval for metrics (in seconds) pollingInterval: 30 # Cooldown period after scaling (in seconds)