mirror of
https://github.com/OneUptime/oneuptime.git
synced 2026-04-06 00:32:12 +02:00
feat: add KEDA autoscaling configuration for worker and telemetry queue metrics
This commit is contained in:
@@ -832,7 +832,7 @@ spec:
|
||||
- type: metrics-api
|
||||
metadata:
|
||||
targetValue: {{ .threshold | quote }}
|
||||
url: http://{{ printf "%s-%s" $.Release.Name $.ServiceName }}:{{ .port }}/metrics/queue-size
|
||||
url: http://{{ printf "%s-%s" $.Release.Name $.ServiceName }}:{{ .port }}{{ if .urlPath }}{{ .urlPath }}{{ else }}/metrics/queue-size{{ end }}
|
||||
valueLocation: 'queueSize'
|
||||
method: 'GET'
|
||||
# authenticationRef:
|
||||
|
||||
@@ -17,6 +17,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ printf "%s-%s" $.Release.Name "app" }}
|
||||
{{- if not (and $.Values.keda.enabled $.Values.app.keda.enabled) }}
|
||||
{{- if and $.Values.app (hasKey $.Values.app "replicaCount") (ne $.Values.app.replicaCount nil) }}
|
||||
replicas: {{ $.Values.app.replicaCount }}
|
||||
{{- else }}
|
||||
@@ -24,6 +25,7 @@ spec:
|
||||
replicas: {{ $.Values.deployment.replicaCount }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
strategy: {{- toYaml $.Values.deployment.updateStrategy | nindent 4 }}
|
||||
template:
|
||||
metadata:
|
||||
@@ -136,8 +138,8 @@ spec:
|
||||
|
||||
---
|
||||
|
||||
# OneUptime app autoscaler
|
||||
{{- if not $.Values.app.disableAutoscaler }}
|
||||
# OneUptime app autoscaler (skip HPA when KEDA is managing scaling)
|
||||
{{- if and (not $.Values.app.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.app.keda.enabled)) }}
|
||||
{{- $appAutoScalerArgs := dict "ServiceName" "app" "Release" $.Release "Values" $.Values -}}
|
||||
{{- include "oneuptime.autoscaler" $appAutoScalerArgs }}
|
||||
{{- end }}
|
||||
|
||||
@@ -17,6 +17,13 @@ KEDA ScaledObjects for various services
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/* App KEDA ScaledObject - scales based on worker and telemetry queue sizes */}}
|
||||
{{- if and .Values.keda.enabled .Values.app.enabled .Values.app.keda.enabled (not .Values.app.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
|
||||
{{- $metricsConfig := dict "enabled" .Values.app.keda.enabled "minReplicas" .Values.app.keda.minReplicas "maxReplicas" .Values.app.keda.maxReplicas "pollingInterval" .Values.app.keda.pollingInterval "cooldownPeriod" .Values.app.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_app_worker_queue_size" "threshold" .Values.app.keda.workerQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/worker/metrics/queue-size") (dict "query" "oneuptime_app_telemetry_queue_size" "threshold" .Values.app.keda.telemetryQueueSizeThreshold "port" .Values.app.ports.http "urlPath" "/telemetry/metrics/queue-size")) }}
|
||||
{{- $appKedaArgs := dict "ServiceName" "app" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.app.disableAutoscaler }}
|
||||
{{- include "oneuptime.kedaScaledObject" $appKedaArgs }}
|
||||
{{- end }}
|
||||
|
||||
{{/* AI Agent KEDA ScaledObject */}}
|
||||
{{- if and .Values.keda.enabled .Values.aiAgent.enabled .Values.aiAgent.keda.enabled (not .Values.aiAgent.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
|
||||
{{- $metricsConfig := dict "enabled" .Values.aiAgent.keda.enabled "minReplicas" .Values.aiAgent.keda.minReplicas "maxReplicas" .Values.aiAgent.keda.maxReplicas "pollingInterval" .Values.aiAgent.keda.pollingInterval "cooldownPeriod" .Values.aiAgent.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_ai_agent_queue_size" "threshold" .Values.aiAgent.keda.queueSizeThreshold "port" .Values.aiAgent.ports.http)) }}
|
||||
|
||||
@@ -1794,6 +1794,33 @@
|
||||
},
|
||||
"enableProfiling": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"keda": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"minReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"maxReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"workerQueueSizeThreshold": {
|
||||
"type": "integer"
|
||||
},
|
||||
"telemetryQueueSizeThreshold": {
|
||||
"type": "integer"
|
||||
},
|
||||
"pollingInterval": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cooldownPeriod": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
|
||||
@@ -662,6 +662,19 @@ app:
|
||||
nodeSelector: {}
|
||||
podSecurityContext: {}
|
||||
containerSecurityContext: {}
|
||||
# KEDA autoscaling configuration based on worker and telemetry queue metrics
|
||||
keda:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 100
|
||||
# Scale up when worker queue size exceeds this threshold
|
||||
workerQueueSizeThreshold: 10
|
||||
# Scale up when telemetry queue size exceeds this threshold
|
||||
telemetryQueueSizeThreshold: 10
|
||||
# Polling interval for metrics (in seconds)
|
||||
pollingInterval: 30
|
||||
# Cooldown period after scaling (in seconds)
|
||||
cooldownPeriod: 300
|
||||
|
||||
# AI Agent Configuration
|
||||
# Deploy this to run an AI Agent within your Kubernetes cluster
|
||||
|
||||
Reference in New Issue
Block a user