mirror of
https://github.com/OneUptime/oneuptime.git
synced 2026-04-06 00:32:12 +02:00
Add utility classes for telemetry: Monitor, StackTrace, and Syslog parsing
- Implemented MonitorUtil for managing monitor secrets and populating them in monitor steps and tests. - Created StackTraceParser to parse and structure stack traces from various programming languages. - Developed SyslogParser to handle and parse syslog messages in both RFC 5424 and RFC 3164 formats.
This commit is contained in:
@@ -117,13 +117,13 @@ Usage:
|
||||
- name: SERVER_APP_HOSTNAME
|
||||
value: {{ $.Release.Name }}-app.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
|
||||
- name: TELEMETRY_HOSTNAME
|
||||
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
|
||||
value: {{ $.Release.Name }}-app.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
|
||||
- name: SERVER_TELEMETRY_HOSTNAME
|
||||
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
|
||||
value: {{ $.Release.Name }}-app.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
|
||||
- name: APP_PORT
|
||||
value: {{ $.Values.app.ports.http | squote }}
|
||||
- name: TELEMETRY_PORT
|
||||
value: {{ $.Values.telemetry.ports.http | squote }}
|
||||
value: {{ $.Values.app.ports.http | squote }}
|
||||
- name: HOME_PORT
|
||||
value: {{ $.Values.home.ports.http | squote }}
|
||||
- name: WORKER_CONCURRENCY
|
||||
|
||||
@@ -118,11 +118,16 @@ spec:
|
||||
value: {{ $.Values.app.disableTelemetryCollection | quote }}
|
||||
- name: ENABLE_PROFILING
|
||||
value: {{ $.Values.app.enableProfiling | quote }}
|
||||
|
||||
- name: TELEMETRY_CONCURRENCY
|
||||
value: {{ $.Values.app.telemetryConcurrency | default 100 | squote }}
|
||||
{{- include "oneuptime.env.registerProbeKey" (dict "Values" $.Values "Release" $.Release) | nindent 12 }}
|
||||
ports:
|
||||
- containerPort: {{ $.Values.app.ports.http }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
- containerPort: {{ $.Values.app.ports.grpc | default 4317 }}
|
||||
protocol: TCP
|
||||
name: grpc
|
||||
{{- if $.Values.app.resources }}
|
||||
resources:
|
||||
{{- toYaml $.Values.app.resources | nindent 12 }}
|
||||
@@ -141,7 +146,7 @@ spec:
|
||||
|
||||
{{- if and $.Values.app.enabled (not $.Values.deployment.disableDeployments) }}
|
||||
# OneUptime app Service
|
||||
{{- $appPorts := dict "port" $.Values.app.ports.http -}}
|
||||
{{- $appPorts := dict "http" $.Values.app.ports.http "grpc" ($.Values.app.ports.grpc | default 4317) -}}
|
||||
{{- $appServiceArgs := dict "ServiceName" "app" "Ports" $appPorts "Release" $.Release "Values" $.Values -}}
|
||||
{{- include "oneuptime.service" $appServiceArgs }}
|
||||
---
|
||||
|
||||
@@ -2,13 +2,6 @@
|
||||
KEDA ScaledObjects for various services
|
||||
*/}}
|
||||
|
||||
{{/* Telemetry KEDA ScaledObject */}}
|
||||
{{- if and .Values.keda.enabled .Values.telemetry.enabled .Values.telemetry.keda.enabled (not .Values.telemetry.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
|
||||
{{- $metricsConfig := dict "enabled" .Values.telemetry.keda.enabled "minReplicas" .Values.telemetry.keda.minReplicas "maxReplicas" .Values.telemetry.keda.maxReplicas "pollingInterval" .Values.telemetry.keda.pollingInterval "cooldownPeriod" .Values.telemetry.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_telemetry_queue_size" "threshold" .Values.telemetry.keda.queueSizeThreshold "port" .Values.telemetry.ports.http)) }}
|
||||
{{- $telemetryKedaArgs := dict "ServiceName" "telemetry" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.telemetry.disableAutoscaler }}
|
||||
{{- include "oneuptime.kedaScaledObject" $telemetryKedaArgs }}
|
||||
{{- end }}
|
||||
|
||||
{{/* Probe KEDA ScaledObjects - one for each probe configuration */}}
|
||||
{{- range $key, $val := $.Values.probes }}
|
||||
{{- $probeEnabled := or (not (hasKey $val "enabled")) $val.enabled }}
|
||||
|
||||
@@ -1 +1 @@
|
||||
{{- /* OTel Collector has been removed. Telemetry ingestion (gRPC + HTTP) is now handled directly by the telemetry service. */ -}}
|
||||
{{- /* OTel Collector has been removed. Telemetry ingestion (gRPC + HTTP) is now handled directly by the app service. */ -}}
|
||||
|
||||
@@ -86,7 +86,7 @@ spec:
|
||||
- name: OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT
|
||||
value: {{ $.Values.openTelemetryExporter.endpoint }}
|
||||
- name: ONEUPTIME_URL
|
||||
value: http://{{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}:{{ $.Values.telemetry.ports.http }}
|
||||
value: http://{{ $.Release.Name }}-app.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}:{{ $.Values.app.ports.http }}
|
||||
- name: PROBE_NAME
|
||||
value: {{ $val.name }}
|
||||
- name: PROBE_DESCRIPTION
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
{{- if and $.Values.telemetry.enabled (not $.Values.deployment.disableDeployments) }}
|
||||
# OneUptime telemetry Deployment
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ printf "%s-%s" $.Release.Name "telemetry" }}
|
||||
namespace: {{ $.Release.Namespace }}
|
||||
labels:
|
||||
app: {{ printf "%s-%s" $.Release.Name "telemetry" }}
|
||||
app.kubernetes.io/part-of: oneuptime
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
appname: oneuptime
|
||||
{{- if $.Values.deployment.includeTimestampLabel }}
|
||||
date: "{{ now | unixEpoch }}"
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ printf "%s-%s" $.Release.Name "telemetry" }}
|
||||
{{- if $.Values.telemetry.replicaCount }}
|
||||
replicas: {{ $.Values.telemetry.replicaCount }}
|
||||
{{- else }}
|
||||
{{- if or (not $.Values.autoscaling.enabled) ($.Values.telemetry.disableAutoscaler) }}
|
||||
replicas: {{ $.Values.deployment.replicaCount }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
strategy: {{- toYaml $.Values.deployment.updateStrategy | nindent 4 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: {{ printf "%s-%s" $.Release.Name "telemetry" }}
|
||||
{{- if $.Values.deployment.includeTimestampLabel }}
|
||||
date: "{{ now | unixEpoch }}"
|
||||
{{- end }}
|
||||
appname: oneuptime
|
||||
spec:
|
||||
volumes:
|
||||
- name: greenlockrc
|
||||
emptyDir:
|
||||
sizeLimit: "1Gi"
|
||||
{{- if $.Values.telemetry.podSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $.Values.telemetry.podSecurityContext | nindent 8 }}
|
||||
{{- else if $.Values.podSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $.Values.podSecurityContext | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if $.Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml $.Values.imagePullSecrets | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if $.Values.affinity }}
|
||||
affinity: {{- $.Values.affinity | toYaml | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if $.Values.tolerations }}
|
||||
tolerations: {{- $.Values.tolerations | toYaml | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if $.Values.telemetry.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml $.Values.telemetry.nodeSelector | nindent 8 }}
|
||||
{{- else if $.Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml $.Values.nodeSelector | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- image: {{ include "oneuptime.image" (dict "Values" $.Values "ServiceName" "telemetry") }}
|
||||
name: {{ printf "%s-%s" $.Release.Name "telemetry" }}
|
||||
{{- if $.Values.startupProbe.enabled }}
|
||||
# Startup probe
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /status/live
|
||||
port: {{ $.Values.telemetry.ports.http }}
|
||||
periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
|
||||
failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
|
||||
{{- end }}
|
||||
{{- if $.Values.livenessProbe.enabled }}
|
||||
# Liveness probe
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /status/live
|
||||
port: {{ $.Values.telemetry.ports.http }}
|
||||
periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
|
||||
timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
|
||||
initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
|
||||
{{- end }}
|
||||
{{- if $.Values.readinessProbe.enabled }}
|
||||
# Readyness Probe
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /status/ready
|
||||
port: {{ $.Values.telemetry.ports.http }}
|
||||
periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
|
||||
initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
|
||||
timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
|
||||
{{- end }}
|
||||
{{- if $.Values.telemetry.containerSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $.Values.telemetry.containerSecurityContext | nindent 12 }}
|
||||
{{- else if $.Values.containerSecurityContext }}
|
||||
securityContext:
|
||||
{{- toYaml $.Values.containerSecurityContext | nindent 12 }}
|
||||
{{- end }}
|
||||
imagePullPolicy: {{ $.Values.image.pullPolicy }}
|
||||
env:
|
||||
{{- include "oneuptime.env.common" . | nindent 12 }}
|
||||
{{- include "oneuptime.env.runtime" (dict "Values" $.Values "Release" $.Release) | nindent 12 }}
|
||||
- name: PORT
|
||||
value: {{ $.Values.telemetry.ports.http | quote }}
|
||||
- name: DISABLE_TELEMETRY
|
||||
value: {{ $.Values.telemetry.disableTelemetryCollection | quote }}
|
||||
- name: ENABLE_PROFILING
|
||||
value: {{ $.Values.telemetry.enableProfiling | quote }}
|
||||
- name: TELEMETRY_CONCURRENCY
|
||||
value: {{ $.Values.telemetry.concurrency | squote }}
|
||||
{{- include "oneuptime.env.registerProbeKey" (dict "Values" $.Values "Release" $.Release) | nindent 12 }}
|
||||
ports:
|
||||
- containerPort: {{ $.Values.telemetry.ports.http }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
- containerPort: {{ $.Values.telemetry.ports.grpc }}
|
||||
protocol: TCP
|
||||
name: grpc
|
||||
{{- if $.Values.telemetry.resources }}
|
||||
resources:
|
||||
{{- toYaml $.Values.telemetry.resources | nindent 12 }}
|
||||
{{- end }}
|
||||
restartPolicy: {{ $.Values.image.restartPolicy }}
|
||||
|
||||
---
|
||||
|
||||
# OneUptime telemetry autoscaler
|
||||
{{- if and (not $.Values.telemetry.disableAutoscaler) (not (and $.Values.keda.enabled $.Values.telemetry.keda.enabled)) }}
|
||||
{{- $telemetryAutoScalerArgs := dict "ServiceName" "telemetry" "Release" $.Release "Values" $.Values -}}
|
||||
{{- include "oneuptime.autoscaler" $telemetryAutoScalerArgs }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
|
||||
{{- if and $.Values.telemetry.enabled (not $.Values.deployment.disableDeployments) }}
|
||||
# OneUptime telemetry Service
|
||||
{{- $telemetryPorts := dict "http" $.Values.telemetry.ports.http "grpc" $.Values.telemetry.ports.grpc -}}
|
||||
{{- $telemetryServiceArgs := dict "ServiceName" "telemetry" "Ports" $telemetryPorts "Release" $.Release "Values" $.Values -}}
|
||||
{{- include "oneuptime.service" $telemetryServiceArgs }}
|
||||
---
|
||||
{{- end }}
|
||||
@@ -1755,49 +1755,7 @@
|
||||
"workerConcurrency": {
|
||||
"type": "integer"
|
||||
},
|
||||
"disableTelemetryCollection": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"disableAutoscaler": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"ports": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"http": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"resources": {
|
||||
"type": [
|
||||
"object",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"nodeSelector": {
|
||||
"type": "object"
|
||||
},
|
||||
"podSecurityContext": {
|
||||
"type": "object"
|
||||
},
|
||||
"containerSecurityContext": {
|
||||
"type": "object"
|
||||
},
|
||||
"enableProfiling": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"telemetry": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"replicaCount": {
|
||||
"telemetryConcurrency": {
|
||||
"type": "integer"
|
||||
},
|
||||
"disableTelemetryCollection": {
|
||||
@@ -1806,9 +1764,6 @@
|
||||
"disableAutoscaler": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"concurrency": {
|
||||
"type": "integer"
|
||||
},
|
||||
"ports": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1816,7 +1771,8 @@
|
||||
"type": "integer"
|
||||
},
|
||||
"grpc": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"default": 4317
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
@@ -1836,30 +1792,6 @@
|
||||
"containerSecurityContext": {
|
||||
"type": "object"
|
||||
},
|
||||
"keda": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"minReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"maxReplicas": {
|
||||
"type": "integer"
|
||||
},
|
||||
"queueSizeThreshold": {
|
||||
"type": "integer"
|
||||
},
|
||||
"pollingInterval": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cooldownPeriod": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"enableProfiling": {
|
||||
"type": "boolean"
|
||||
}
|
||||
|
||||
@@ -650,42 +650,18 @@ app:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
workerConcurrency: 100
|
||||
# Max concurrent telemetry ingestion jobs processed by each pod
|
||||
telemetryConcurrency: 100
|
||||
disableTelemetryCollection: false
|
||||
enableProfiling: false
|
||||
disableAutoscaler: false
|
||||
ports:
|
||||
http: 3002
|
||||
resources:
|
||||
nodeSelector: {}
|
||||
podSecurityContext: {}
|
||||
containerSecurityContext: {}
|
||||
|
||||
telemetry:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
disableTelemetryCollection: false
|
||||
enableProfiling: false
|
||||
disableAutoscaler: false
|
||||
# Max concurrent telemetry jobs processed by each pod
|
||||
concurrency: 100
|
||||
ports:
|
||||
http: 3403
|
||||
grpc: 4317
|
||||
resources:
|
||||
nodeSelector: {}
|
||||
podSecurityContext: {}
|
||||
containerSecurityContext: {}
|
||||
# KEDA autoscaling configuration based on queue metrics
|
||||
keda:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 100
|
||||
# Scale up when queue size exceeds this threshold
|
||||
queueSizeThreshold: 100
|
||||
# Polling interval for metrics (in seconds)
|
||||
pollingInterval: 30
|
||||
# Cooldown period after scaling (in seconds)
|
||||
cooldownPeriod: 300
|
||||
|
||||
# AI Agent Configuration
|
||||
# Deploy this to run an AI Agent within your Kubernetes cluster
|
||||
|
||||
Reference in New Issue
Block a user