fix: enforce resource request requirements for CPU and memory utilization thresholds in app configuration

2026-04-05 16:22:06 +02:00 · 2026-04-03 14:37:12 +01:00
parent 191569eb3d
commit 0502eb5ebe
2 changed files with 15 additions and 5 deletions
--- a/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
+++ b/HelmChart/Public/oneuptime/templates/keda-scaledobjects.yaml
@@ -19,6 +19,14 @@ KEDA ScaledObjects for various services

 {{/* App KEDA ScaledObject - scales based on combined queue size (worker + workflow + telemetry) */}}
 {{- if and .Values.keda.enabled .Values.app.enabled .Values.app.keda.enabled (not .Values.app.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
+{{- $appResources := .Values.app.resources | default dict }}
+{{- $appRequests := get $appResources "requests" | default dict }}
+{{- if and (gt (int (.Values.app.keda.targetCPUUtilizationPercentage | default 0)) 0) (not (get $appRequests "cpu")) }}
+{{- fail "app.keda.targetCPUUtilizationPercentage requires app.resources.requests.cpu to be set" }}
+{{- end }}
+{{- if and (gt (int (.Values.app.keda.targetMemoryUtilizationPercentage | default 0)) 0) (not (get $appRequests "memory")) }}
+{{- fail "app.keda.targetMemoryUtilizationPercentage requires app.resources.requests.memory to be set" }}
+{{- end }}
 {{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "targetCPUUtilizationPercentage" .Values.app.keda.targetCPUUtilizationPercentage "targetMemoryUtilizationPercentage" .Values.app.keda.targetMemoryUtilizationPercentage "triggers" (list (dict "query" "oneuptime_app_queue_size" "threshold" .Values.app.keda.queueSizeThreshold "port" .Values.app.ports.http)) }}
 {{- $appKedaArgs := dict "ServiceName" "app" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.app.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $appKedaArgs }}
@@ -29,4 +37,4 @@ KEDA ScaledObjects for various services
 {{- $metricsConfig := dict "enabled" .Values.aiAgent.keda.enabled "minReplicas" .Values.aiAgent.keda.minReplicas "maxReplicas" .Values.aiAgent.keda.maxReplicas "pollingInterval" .Values.aiAgent.keda.pollingInterval "cooldownPeriod" .Values.aiAgent.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_ai_agent_queue_size" "threshold" .Values.aiAgent.keda.queueSizeThreshold "port" .Values.aiAgent.ports.http)) }}
 {{- $aiAgentKedaArgs := dict "ServiceName" "ai-agent" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.aiAgent.disableAutoscaler }}
 {{- include "oneuptime.kedaScaledObject" $aiAgentKedaArgs }}
-{{- end }}
+{{- end }}
--- a/HelmChart/Public/oneuptime/values.yaml
+++ b/HelmChart/Public/oneuptime/values.yaml
@@ -669,10 +669,12 @@ app:
    maxReplicas: 100
    # Scale up when combined queue size (worker + workflow + telemetry) exceeds this threshold
    queueSizeThreshold: 10
-    # Scale up when average CPU utilization exceeds this percentage (0 to disable)
-    targetCPUUtilizationPercentage: 80
-    # Scale up when average memory utilization exceeds this percentage (0 to disable)
-    targetMemoryUtilizationPercentage: 80
+    # Scale up when average CPU utilization exceeds this percentage (0 to disable).
+    # If enabled, app.resources.requests.cpu must also be set.
+    targetCPUUtilizationPercentage: 0
+    # Scale up when average memory utilization exceeds this percentage (0 to disable).
+    # If enabled, app.resources.requests.memory must also be set.
+    targetMemoryUtilizationPercentage: 0
    # Polling interval for metrics (in seconds)
    pollingInterval: 30
    # Cooldown period after scaling (in seconds)