refactor: remove OpenTelemetry Collector and integrate telemetry service

- Deleted the otel-collector job from GitHub workflows and related deployment configurations.
- Updated Helm charts to remove references to the OpenTelemetry Collector, including its deployment and service definitions.
- Added gRPC server functionality directly in the telemetry service to handle telemetry ingestion.
- Updated environment variables and Docker configurations to reflect the removal of the OpenTelemetry Collector.
- Adjusted telemetry service to support both HTTP and gRPC protocols for telemetry data.
This commit is contained in:
Nawaz Dhandala
2026-03-05 09:36:11 +00:00
parent 88a280031b
commit f9c90d7143
19 changed files with 412 additions and 448 deletions

View File

@@ -113,8 +113,6 @@ Usage:
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: SERVER_TELEMETRY_HOSTNAME
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: SERVER_OTEL_COLLECTOR_HOSTNAME
value: {{ $.Release.Name }}-otel-collector.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: APP_PORT
value: {{ $.Values.app.ports.http | squote }}
- name: TELEMETRY_PORT

View File

@@ -1,147 +1 @@
{{- if $.Values.openTelemetryCollector.enabled }}
# OneUptime otel-collector Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
namespace: {{ $.Release.Namespace }}
labels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
app.kubernetes.io/part-of: oneuptime
app.kubernetes.io/managed-by: Helm
appname: oneuptime
{{- if $.Values.deployment.includeTimestampLabel }}
date: "{{ now | unixEpoch }}"
{{- end }}
spec:
selector:
matchLabels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
{{- if $.Values.openTelemetryCollector.replicaCount }}
replicas: {{ $.Values.openTelemetryCollector.replicaCount }}
{{- else }}
{{- if or (not $.Values.autoscaling.enabled) ($.Values.openTelemetryCollector.disableAutoscaler) }}
replicas: {{ $.Values.deployment.replicaCount }}
{{- end }}
{{- end }}
strategy: {{- toYaml $.Values.deployment.updateStrategy | nindent 4 }}
template:
metadata:
labels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
{{- if $.Values.deployment.includeTimestampLabel }}
date: "{{ now | unixEpoch }}"
{{- end }}
appname: oneuptime
spec:
volumes:
- name: greenlockrc
emptyDir:
sizeLimit: "1Gi"
{{- if $.Values.openTelemetryCollector.podSecurityContext }}
securityContext:
{{- toYaml $.Values.openTelemetryCollector.podSecurityContext | nindent 8 }}
{{- else if $.Values.podSecurityContext }}
securityContext:
{{- toYaml $.Values.podSecurityContext | nindent 8 }}
{{- end }}
{{- if $.Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml $.Values.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if $.Values.affinity }}
affinity: {{- $.Values.affinity | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.tolerations }}
tolerations: {{- $.Values.tolerations | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.openTelemetryCollector.nodeSelector }}
nodeSelector:
{{- toYaml $.Values.openTelemetryCollector.nodeSelector | nindent 8 }}
{{- else if $.Values.nodeSelector }}
nodeSelector:
{{- toYaml $.Values.nodeSelector | nindent 8 }}
{{- end }}
containers:
- image: {{ include "oneuptime.image" (dict "Values" $.Values "ServiceName" "otel-collector") }}
name: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
# Liveness probe
{{- if $.Values.startupProbe.enabled }}
# Startup probe
startupProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
{{- end }}
{{- if $.Values.livenessProbe.enabled }}
# Liveness probe
livenessProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
{{- end }}
{{- if $.Values.readinessProbe.enabled }}
# Readyness Probe
readinessProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
{{- end }}
{{- if $.Values.openTelemetryCollector.containerSecurityContext }}
securityContext:
{{- toYaml $.Values.openTelemetryCollector.containerSecurityContext | nindent 12 }}
{{- else if $.Values.containerSecurityContext }}
securityContext:
{{- toYaml $.Values.containerSecurityContext | nindent 12 }}
{{- end }}
imagePullPolicy: {{ $.Values.image.pullPolicy }}
env:
{{- include "oneuptime.env.common" . | nindent 12 }}
{{- include "oneuptime.env.runtime" (dict "Values" $.Values "Release" $.Release) | nindent 12 }}
- name: PORT
value: {{ $.Values.openTelemetryCollector.ports.grpc | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED
value: {{ $.Values.openTelemetryCollector.sendingQueue.enabled | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS
value: {{ $.Values.openTelemetryCollector.sendingQueue.numConsumers | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_SIZE
value: {{ $.Values.openTelemetryCollector.sendingQueue.size | quote }}
- name: DISABLE_TELEMETRY
value: {{ $.Values.openTelemetryCollector.disableTelemetryCollection | quote }}
ports:
- containerPort: {{ $.Values.openTelemetryCollector.ports.http }}
protocol: TCP
name: http
- containerPort: {{ $.Values.openTelemetryCollector.ports.grpc }}
protocol: TCP
name: grpc
{{- if $.Values.openTelemetryCollector.resources }}
resources:
{{- toYaml $.Values.openTelemetryCollector.resources | nindent 12 }}
{{- end }}
restartPolicy: {{ $.Values.image.restartPolicy }}
---
# OneUptime otel-collector autoscaler
{{- if not $.Values.openTelemetryCollector.disableAutoscaler }}
{{- $identityAutoScalerArgs := dict "ServiceName" "otel-collector" "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.autoscaler" $identityAutoScalerArgs }}
{{- end }}
---
{{- end }}
# OneUptime otel-collector Service
{{- $otelCollectorPorts := dict "grpc" $.Values.openTelemetryCollector.ports.grpc "http" $.Values.openTelemetryCollector.ports.http -}}
{{- $identityServiceArgs := dict "ServiceName" "otel-collector" "Ports" $otelCollectorPorts "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.service" $identityServiceArgs }}
---
{{- /* OTel Collector has been removed. Telemetry ingestion (gRPC + HTTP) is now handled directly by the telemetry service. */ -}}

View File

@@ -117,6 +117,9 @@ spec:
- containerPort: {{ $.Values.telemetry.ports.http }}
protocol: TCP
name: http
- containerPort: {{ $.Values.telemetry.ports.grpc }}
protocol: TCP
name: grpc
{{- if $.Values.telemetry.resources }}
resources:
{{- toYaml $.Values.telemetry.resources | nindent 12 }}
@@ -135,7 +138,7 @@ spec:
---
# OneUptime telemetry Service
{{- $telemetryPorts := dict "port" $.Values.telemetry.ports.http -}}
{{- $telemetryPorts := dict "http" $.Values.telemetry.ports.http "grpc" $.Values.telemetry.ports.grpc -}}
{{- $telemetryServiceArgs := dict "ServiceName" "telemetry" "Ports" $telemetryPorts "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.service" $telemetryServiceArgs }}
---

View File

@@ -1262,63 +1262,6 @@
},
"additionalProperties": false
},
"openTelemetryCollector": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"replicaCount": {
"type": "integer"
},
"disableTelemetryCollection": {
"type": "boolean"
},
"disableAutoscaler": {
"type": "boolean"
},
"ports": {
"type": "object",
"properties": {
"grpc": {
"type": "integer"
},
"http": {
"type": "integer"
}
},
"additionalProperties": false
},
"sendingQueue": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"size": {
"type": "integer"
},
"numConsumers": {
"type": "integer"
}
},
"additionalProperties": false
},
"resources": {
"type": ["object", "null"]
},
"nodeSelector": {
"type": "object"
},
"podSecurityContext": {
"type": "object"
},
"containerSecurityContext": {
"type": "object"
}
},
"additionalProperties": false
},
"home": {
"type": "object",
"properties": {
@@ -1528,6 +1471,9 @@
"properties": {
"http": {
"type": "integer"
},
"grpc": {
"type": "integer"
}
},
"additionalProperties": false

View File

@@ -558,25 +558,6 @@ readinessProbe: # Readiness probe configuration
initialDelaySeconds: 10
timeoutSeconds: 120
# OpenTelemetry Collector Configuration
openTelemetryCollector:
enabled: true
replicaCount: 1
disableTelemetryCollection: false
disableAutoscaler: false
ports:
grpc: 4317
http: 4318
sendingQueue:
enabled: true
size: 1000
numConsumers: 3
resources: {}
nodeSelector: {}
podSecurityContext: {}
containerSecurityContext: {}
home:
enabled: true
replicaCount: 1
@@ -636,6 +617,7 @@ telemetry:
concurrency: 100
ports:
http: 3403
grpc: 4317
resources:
nodeSelector: {}
podSecurityContext: {}