refactor: remove OpenTelemetry Collector and integrate telemetry service

- Deleted the otel-collector job from GitHub workflows and related deployment configurations.
- Updated Helm charts to remove references to the OpenTelemetry Collector, including its deployment and service definitions.
- Added gRPC server functionality directly in the telemetry service to handle telemetry ingestion.
- Updated environment variables and Docker configurations to reflect the removal of the OpenTelemetry Collector.
- Adjusted telemetry service to support both HTTP and gRPC protocols for telemetry data.
This commit is contained in:
Nawaz Dhandala
2026-03-05 09:36:11 +00:00
parent 88a280031b
commit f9c90d7143
19 changed files with 412 additions and 448 deletions

View File

@@ -57,29 +57,6 @@ jobs:
command: sudo docker build --no-cache -f ./Worker/Dockerfile .
docker-build-otel-collector:
runs-on: ubuntu-latest
env:
CI_PIPELINE_ID: ${{github.run_number}}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Preinstall
uses: nick-fields/retry@v3
with:
timeout_minutes: 10
max_attempts: 3
command: npm run prerun
# build image for accounts service
- name: build docker image
uses: nick-fields/retry@v3
with:
timeout_minutes: 45
max_attempts: 3
command: sudo docker build --no-cache -f ./OTelCollector/Dockerfile .
docker-build-app:
runs-on: ubuntu-latest
env:

View File

@@ -423,76 +423,6 @@ jobs:
--platforms linux/amd64,linux/arm64 \
--git-sha "${{ github.sha }}"
otel-collector-docker-image-deploy:
needs: [generate-build-number, read-version]
runs-on: ubuntu-latest
env:
QEMU_CPU: max
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Docker Meta
id: meta
uses: docker/metadata-action@v4
with:
images: |
oneuptime/otel-collector
ghcr.io/oneuptime/otel-collector
tags: |
type=raw,value=release,enable=true
type=semver,value=${{needs.read-version.outputs.major_minor}},pattern={{version}},enable=true
- uses: actions/checkout@v4
with:
ref: ${{ github.ref }}
- uses: actions/setup-node@v4
with:
node-version: latest
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v10.0.4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Generate Dockerfile from Dockerfile.tpl
run: npm run prerun
# Build and deploy otel-collector.
- name: Login to Docker Hub
run: |
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | docker login --username "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
- name: Login to GitHub Container Registry
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io --username "${{ github.repository_owner }}" --password-stdin
- name: Build and push
run: |
bash ./Scripts/GHA/build_docker_images.sh \
--image otel-collector \
--version "${{needs.read-version.outputs.major_minor}}" \
--dockerfile ./OTelCollector/Dockerfile \
--context . \
--platforms linux/amd64,linux/arm64 \
--git-sha "${{ github.sha }}"
test-docker-image-deploy:
needs: [generate-build-number, read-version]
runs-on: ubuntu-latest
@@ -1001,7 +931,6 @@ jobs:
- e2e-docker-image-deploy
- home-docker-image-deploy
- test-server-docker-image-deploy
- otel-collector-docker-image-deploy
- test-docker-image-deploy
- telemetry-docker-image-deploy
- probe-docker-image-deploy
@@ -1020,7 +949,6 @@ jobs:
"e2e",
"home",
"test-server",
"otel-collector",
"test",
"telemetry",
"probe",
@@ -1073,7 +1001,7 @@ jobs:
test-e2e-release-saas:
runs-on: ubuntu-latest
needs: [telemetry-docker-image-deploy, ai-agent-docker-image-deploy, app-docker-image-deploy, home-docker-image-deploy, worker-docker-image-deploy, otel-collector-docker-image-deploy, probe-docker-image-deploy, test-docker-image-deploy, test-server-docker-image-deploy, publish-npm-packages, e2e-docker-image-deploy, helm-chart-deploy, generate-build-number, read-version, nginx-docker-image-deploy]
needs: [telemetry-docker-image-deploy, ai-agent-docker-image-deploy, app-docker-image-deploy, home-docker-image-deploy, worker-docker-image-deploy, probe-docker-image-deploy, test-docker-image-deploy, test-server-docker-image-deploy, publish-npm-packages, e2e-docker-image-deploy, helm-chart-deploy, generate-build-number, read-version, nginx-docker-image-deploy]
env:
CI_PIPELINE_ID: ${{github.run_number}}
steps:
@@ -1204,7 +1132,7 @@ jobs:
test-e2e-release-self-hosted:
runs-on: ubuntu-latest
# After all the jobs runs
needs: [telemetry-docker-image-deploy, ai-agent-docker-image-deploy, app-docker-image-deploy, home-docker-image-deploy, worker-docker-image-deploy, otel-collector-docker-image-deploy, probe-docker-image-deploy, test-docker-image-deploy, test-server-docker-image-deploy, publish-npm-packages, e2e-docker-image-deploy, helm-chart-deploy, generate-build-number, read-version, nginx-docker-image-deploy]
needs: [telemetry-docker-image-deploy, ai-agent-docker-image-deploy, app-docker-image-deploy, home-docker-image-deploy, worker-docker-image-deploy, probe-docker-image-deploy, test-docker-image-deploy, test-server-docker-image-deploy, publish-npm-packages, e2e-docker-image-deploy, helm-chart-deploy, generate-build-number, read-version, nginx-docker-image-deploy]
env:
CI_PIPELINE_ID: ${{github.run_number}}
steps:

View File

@@ -297,76 +297,6 @@ jobs:
--extra-tags test \
--extra-enterprise-tags enterprise-test
otel-collector-docker-image-deploy:
needs: [read-version, generate-build-number]
runs-on: ubuntu-latest
env:
QEMU_CPU: max
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Docker Meta
id: meta
uses: docker/metadata-action@v4
with:
images: |
oneuptime/otel-collector
ghcr.io/oneuptime/otel-collector
tags: |
type=raw,value=test,enable=true
type=raw,value=${{needs.read-version.outputs.major_minor}}-test,enable=true
- uses: actions/checkout@v4
with:
ref: ${{ github.ref }}
- uses: actions/setup-node@v4
with:
node-version: latest
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v10.0.4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Generate Dockerfile from Dockerfile.tpl
run: npm run prerun
# Build and deploy otel-collector.
- name: Login to Docker Hub
run: |
echo "${{ secrets.DOCKERHUB_PASSWORD }}" | docker login --username "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
- name: Login to GitHub Container Registry
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io --username "${{ github.repository_owner }}" --password-stdin
- name: Build and push
run: |
bash ./Scripts/GHA/build_docker_images.sh \
--image otel-collector \
--version "${{needs.read-version.outputs.major_minor}}-test" \
--dockerfile ./OTelCollector/Dockerfile \
--context . \
--platforms linux/amd64,linux/arm64 \
--git-sha "${{ github.sha }}" \
--extra-tags test \
--extra-enterprise-tags enterprise-test
home-docker-image-deploy:
needs: [read-version, generate-build-number]
runs-on: ubuntu-latest
@@ -879,7 +809,7 @@ jobs:
test-helm-chart:
runs-on: ubuntu-latest
needs: [infrastructure-agent-deploy, publish-terraform-provider, telemetry-docker-image-deploy, worker-docker-image-deploy, home-docker-image-deploy, test-server-docker-image-deploy, test-docker-image-deploy, probe-docker-image-deploy, app-docker-image-deploy, ai-agent-docker-image-deploy, otel-collector-docker-image-deploy, nginx-docker-image-deploy, e2e-docker-image-deploy]
needs: [infrastructure-agent-deploy, publish-terraform-provider, telemetry-docker-image-deploy, worker-docker-image-deploy, home-docker-image-deploy, test-server-docker-image-deploy, test-docker-image-deploy, probe-docker-image-deploy, app-docker-image-deploy, ai-agent-docker-image-deploy, nginx-docker-image-deploy, e2e-docker-image-deploy]
env:
CI_PIPELINE_ID: ${{github.run_number}}
steps:

View File

@@ -113,8 +113,6 @@ Usage:
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: SERVER_TELEMETRY_HOSTNAME
value: {{ $.Release.Name }}-telemetry.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: SERVER_OTEL_COLLECTOR_HOSTNAME
value: {{ $.Release.Name }}-otel-collector.{{ $.Release.Namespace }}.svc.{{ $.Values.global.clusterDomain }}
- name: APP_PORT
value: {{ $.Values.app.ports.http | squote }}
- name: TELEMETRY_PORT

View File

@@ -1,147 +1 @@
{{- if $.Values.openTelemetryCollector.enabled }}
# OneUptime otel-collector Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
namespace: {{ $.Release.Namespace }}
labels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
app.kubernetes.io/part-of: oneuptime
app.kubernetes.io/managed-by: Helm
appname: oneuptime
{{- if $.Values.deployment.includeTimestampLabel }}
date: "{{ now | unixEpoch }}"
{{- end }}
spec:
selector:
matchLabels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
{{- if $.Values.openTelemetryCollector.replicaCount }}
replicas: {{ $.Values.openTelemetryCollector.replicaCount }}
{{- else }}
{{- if or (not $.Values.autoscaling.enabled) ($.Values.openTelemetryCollector.disableAutoscaler) }}
replicas: {{ $.Values.deployment.replicaCount }}
{{- end }}
{{- end }}
strategy: {{- toYaml $.Values.deployment.updateStrategy | nindent 4 }}
template:
metadata:
labels:
app: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
{{- if $.Values.deployment.includeTimestampLabel }}
date: "{{ now | unixEpoch }}"
{{- end }}
appname: oneuptime
spec:
volumes:
- name: greenlockrc
emptyDir:
sizeLimit: "1Gi"
{{- if $.Values.openTelemetryCollector.podSecurityContext }}
securityContext:
{{- toYaml $.Values.openTelemetryCollector.podSecurityContext | nindent 8 }}
{{- else if $.Values.podSecurityContext }}
securityContext:
{{- toYaml $.Values.podSecurityContext | nindent 8 }}
{{- end }}
{{- if $.Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml $.Values.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if $.Values.affinity }}
affinity: {{- $.Values.affinity | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.tolerations }}
tolerations: {{- $.Values.tolerations | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.openTelemetryCollector.nodeSelector }}
nodeSelector:
{{- toYaml $.Values.openTelemetryCollector.nodeSelector | nindent 8 }}
{{- else if $.Values.nodeSelector }}
nodeSelector:
{{- toYaml $.Values.nodeSelector | nindent 8 }}
{{- end }}
containers:
- image: {{ include "oneuptime.image" (dict "Values" $.Values "ServiceName" "otel-collector") }}
name: {{ printf "%s-%s" $.Release.Name "otel-collector" }}
# Liveness probe
{{- if $.Values.startupProbe.enabled }}
# Startup probe
startupProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.startupProbe.periodSeconds }}
failureThreshold: {{ $.Values.startupProbe.failureThreshold }}
{{- end }}
{{- if $.Values.livenessProbe.enabled }}
# Liveness probe
livenessProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ $.Values.livenessProbe.timeoutSeconds }}
initialDelaySeconds: {{ $.Values.livenessProbe.initialDelaySeconds }}
{{- end }}
{{- if $.Values.readinessProbe.enabled }}
# Readyness Probe
readinessProbe:
httpGet:
path: /health/status
port: 13133
periodSeconds: {{ $.Values.readinessProbe.periodSeconds }}
initialDelaySeconds: {{ $.Values.readinessProbe.initialDelaySeconds }}
timeoutSeconds: {{ $.Values.readinessProbe.timeoutSeconds }}
{{- end }}
{{- if $.Values.openTelemetryCollector.containerSecurityContext }}
securityContext:
{{- toYaml $.Values.openTelemetryCollector.containerSecurityContext | nindent 12 }}
{{- else if $.Values.containerSecurityContext }}
securityContext:
{{- toYaml $.Values.containerSecurityContext | nindent 12 }}
{{- end }}
imagePullPolicy: {{ $.Values.image.pullPolicy }}
env:
{{- include "oneuptime.env.common" . | nindent 12 }}
{{- include "oneuptime.env.runtime" (dict "Values" $.Values "Release" $.Release) | nindent 12 }}
- name: PORT
value: {{ $.Values.openTelemetryCollector.ports.grpc | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED
value: {{ $.Values.openTelemetryCollector.sendingQueue.enabled | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS
value: {{ $.Values.openTelemetryCollector.sendingQueue.numConsumers | quote }}
- name: OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_SIZE
value: {{ $.Values.openTelemetryCollector.sendingQueue.size | quote }}
- name: DISABLE_TELEMETRY
value: {{ $.Values.openTelemetryCollector.disableTelemetryCollection | quote }}
ports:
- containerPort: {{ $.Values.openTelemetryCollector.ports.http }}
protocol: TCP
name: http
- containerPort: {{ $.Values.openTelemetryCollector.ports.grpc }}
protocol: TCP
name: grpc
{{- if $.Values.openTelemetryCollector.resources }}
resources:
{{- toYaml $.Values.openTelemetryCollector.resources | nindent 12 }}
{{- end }}
restartPolicy: {{ $.Values.image.restartPolicy }}
---
# OneUptime otel-collector autoscaler
{{- if not $.Values.openTelemetryCollector.disableAutoscaler }}
{{- $identityAutoScalerArgs := dict "ServiceName" "otel-collector" "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.autoscaler" $identityAutoScalerArgs }}
{{- end }}
---
{{- end }}
# OneUptime otel-collector Service
{{- $otelCollectorPorts := dict "grpc" $.Values.openTelemetryCollector.ports.grpc "http" $.Values.openTelemetryCollector.ports.http -}}
{{- $identityServiceArgs := dict "ServiceName" "otel-collector" "Ports" $otelCollectorPorts "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.service" $identityServiceArgs }}
---
{{- /* OTel Collector has been removed. Telemetry ingestion (gRPC + HTTP) is now handled directly by the telemetry service. */ -}}

View File

@@ -117,6 +117,9 @@ spec:
- containerPort: {{ $.Values.telemetry.ports.http }}
protocol: TCP
name: http
- containerPort: {{ $.Values.telemetry.ports.grpc }}
protocol: TCP
name: grpc
{{- if $.Values.telemetry.resources }}
resources:
{{- toYaml $.Values.telemetry.resources | nindent 12 }}
@@ -135,7 +138,7 @@ spec:
---
# OneUptime telemetry Service
{{- $telemetryPorts := dict "port" $.Values.telemetry.ports.http -}}
{{- $telemetryPorts := dict "http" $.Values.telemetry.ports.http "grpc" $.Values.telemetry.ports.grpc -}}
{{- $telemetryServiceArgs := dict "ServiceName" "telemetry" "Ports" $telemetryPorts "Release" $.Release "Values" $.Values -}}
{{- include "oneuptime.service" $telemetryServiceArgs }}
---

View File

@@ -1262,63 +1262,6 @@
},
"additionalProperties": false
},
"openTelemetryCollector": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"replicaCount": {
"type": "integer"
},
"disableTelemetryCollection": {
"type": "boolean"
},
"disableAutoscaler": {
"type": "boolean"
},
"ports": {
"type": "object",
"properties": {
"grpc": {
"type": "integer"
},
"http": {
"type": "integer"
}
},
"additionalProperties": false
},
"sendingQueue": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"size": {
"type": "integer"
},
"numConsumers": {
"type": "integer"
}
},
"additionalProperties": false
},
"resources": {
"type": ["object", "null"]
},
"nodeSelector": {
"type": "object"
},
"podSecurityContext": {
"type": "object"
},
"containerSecurityContext": {
"type": "object"
}
},
"additionalProperties": false
},
"home": {
"type": "object",
"properties": {
@@ -1528,6 +1471,9 @@
"properties": {
"http": {
"type": "integer"
},
"grpc": {
"type": "integer"
}
},
"additionalProperties": false

View File

@@ -558,25 +558,6 @@ readinessProbe: # Readiness probe configuration
initialDelaySeconds: 10
timeoutSeconds: 120
# OpenTelemetry Collector Configuration
openTelemetryCollector:
enabled: true
replicaCount: 1
disableTelemetryCollection: false
disableAutoscaler: false
ports:
grpc: 4317
http: 4318
sendingQueue:
enabled: true
size: 1000
numConsumers: 3
resources: {}
nodeSelector: {}
podSecurityContext: {}
containerSecurityContext: {}
home:
enabled: true
replicaCount: 1
@@ -636,6 +617,7 @@ telemetry:
concurrency: 100
ports:
http: 3403
grpc: 4317
resources:
nodeSelector: {}
podSecurityContext: {}

View File

@@ -19,12 +19,8 @@ upstream home {
server ${SERVER_HOME_HOSTNAME}:${HOME_PORT} weight=10 max_fails=3 fail_timeout=30s;
}
upstream opentelemetry-collector-http {
server ${SERVER_OTEL_COLLECTOR_HOSTNAME}:4318;
}
upstream opentelemetry-collector-grpc {
server ${SERVER_OTEL_COLLECTOR_HOSTNAME}:4317;
upstream opentelemetry-grpc {
server ${SERVER_TELEMETRY_HOSTNAME}:4317;
}
# Status Pages
@@ -472,12 +468,12 @@ ${PROVISION_SSL_CERTIFICATE_KEY_DIRECTIVE}
location /otlp/ {
# This is for nginx not to crash when service is not available.
# This is for nginx not to crash when service is not available.
resolver 127.0.0.1 valid=30s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Proto $scheme;
# enable WebSockets (for ws://sockjs not connected error in the accounts source: https://stackoverflow.com/questions/41381444/websocket-connection-failed-error-during-websocket-handshake-unexpected-respon)
proxy_http_version 1.1;
@@ -485,14 +481,14 @@ ${PROVISION_SSL_CERTIFICATE_KEY_DIRECTIVE}
proxy_set_header Connection "upgrade";
proxy_pass http://opentelemetry-collector-http/;
proxy_pass http://telemetry/otlp/;
}
location ~ /opentelemetry.proto.collector* {
# This is for nginx not to crash when service is not available.
# This is for nginx not to crash when service is not available.
resolver 127.0.0.1 valid=30s;
grpc_pass grpc://opentelemetry-collector-grpc;
grpc_pass grpc://opentelemetry-grpc;
}
location /notification {

262
Telemetry/GrpcServer.ts Normal file
View File

@@ -0,0 +1,262 @@
import * as grpc from "@grpc/grpc-js";
import * as protoLoader from "@grpc/proto-loader";
import path from "path";
import logger from "Common/Server/Utils/Logger";
import ObjectID from "Common/Types/ObjectID";
import ProductType from "Common/Types/MeteredPlan/ProductType";
import TelemetryIngestionKeyService from "Common/Server/Services/TelemetryIngestionKeyService";
import TelemetryIngestionKey from "Common/Models/DatabaseModels/TelemetryIngestionKey";
import { TelemetryRequest } from "Common/Server/Middleware/TelemetryIngest";
import TracesQueueService from "./Services/Queue/TracesQueueService";
import LogsQueueService from "./Services/Queue/LogsQueueService";
import MetricsQueueService from "./Services/Queue/MetricsQueueService";
const GRPC_PORT: number = 4317;
const PROTO_DIR: string = path.resolve(
__dirname,
"ProtoFiles",
"OTel",
"v1",
);
type GrpcCallback = (
error: grpc.ServiceError | null,
response?: Record<string, unknown>,
) => void;
interface GrpcCall {
request: Record<string, unknown>;
metadata: grpc.Metadata;
}
async function authenticateRequest(
metadata: grpc.Metadata,
): Promise<ObjectID | null> {
const tokenValues: grpc.MetadataValue[] =
metadata.get("x-oneuptime-token");
let oneuptimeToken: string | undefined = tokenValues[0]?.toString();
if (!oneuptimeToken) {
const serviceTokenValues: grpc.MetadataValue[] = metadata.get(
"x-oneuptime-service-token",
);
oneuptimeToken = serviceTokenValues[0]?.toString();
}
if (!oneuptimeToken) {
logger.error("gRPC: Missing metadata: x-oneuptime-token");
return null;
}
const token: TelemetryIngestionKey | null =
await TelemetryIngestionKeyService.findOneBy({
query: {
secretKey: new ObjectID(oneuptimeToken),
},
select: {
projectId: true,
},
props: {
isRoot: true,
},
});
if (!token || !token.projectId) {
logger.error("gRPC: Invalid service token: " + oneuptimeToken);
return null;
}
return token.projectId as ObjectID;
}
function buildTelemetryRequest(
body: Record<string, unknown>,
metadata: grpc.Metadata,
projectId: ObjectID,
productType: ProductType,
): TelemetryRequest {
const headers: Record<string, string> = {};
for (const key of metadata.keys()) {
const values: grpc.MetadataValue[] = metadata.get(key);
if (values.length > 0) {
headers[key] = values[0]!.toString();
}
}
const req: Partial<TelemetryRequest> = {
body: body,
headers: headers,
projectId: projectId,
productType: productType,
path: `/otlp/v1/${productType}`,
url: `/otlp/v1/${productType}`,
};
return req as TelemetryRequest;
}
async function handleExport(
call: GrpcCall,
callback: GrpcCallback,
productType: ProductType,
queueFn: (req: TelemetryRequest) => Promise<void>,
): Promise<void> {
try {
const projectId: ObjectID | null = await authenticateRequest(
call.metadata,
);
if (!projectId) {
// Return success to avoid OTel SDK retries
callback(null, {});
return;
}
const body: Record<string, unknown> = call.request;
const req: TelemetryRequest = buildTelemetryRequest(
body,
call.metadata,
projectId,
productType,
);
await queueFn(req);
callback(null, {});
} catch (err) {
logger.error(`gRPC ${productType} export error:`);
logger.error(err);
// Return success to avoid OTel SDK retries
callback(null, {});
}
}
export function startGrpcServer(): void {
const traceServiceDef: protoLoader.PackageDefinition = protoLoader.loadSync(
path.join(PROTO_DIR, "trace_service.proto"),
{
keepCase: false,
longs: String,
enums: String,
defaults: true,
oneofs: true,
includeDirs: [PROTO_DIR],
},
);
const logsServiceDef: protoLoader.PackageDefinition = protoLoader.loadSync(
path.join(PROTO_DIR, "logs_service.proto"),
{
keepCase: false,
longs: String,
enums: String,
defaults: true,
oneofs: true,
includeDirs: [PROTO_DIR],
},
);
const metricsServiceDef: protoLoader.PackageDefinition =
protoLoader.loadSync(
path.join(PROTO_DIR, "metrics_service.proto"),
{
keepCase: false,
longs: String,
enums: String,
defaults: true,
oneofs: true,
includeDirs: [PROTO_DIR],
},
);
const traceProto: grpc.GrpcObject =
grpc.loadPackageDefinition(traceServiceDef);
const logsProto: grpc.GrpcObject =
grpc.loadPackageDefinition(logsServiceDef);
const metricsProto: grpc.GrpcObject =
grpc.loadPackageDefinition(metricsServiceDef);
type ProtoServiceDef = {
service: grpc.ServiceDefinition;
};
function getServiceDefinition(
proto: grpc.GrpcObject,
...path: Array<string>
): grpc.ServiceDefinition {
let current: unknown = proto;
for (const key of path) {
current = (current as Record<string, unknown>)[key];
}
return (current as ProtoServiceDef).service;
}
const traceServiceDefinition: grpc.ServiceDefinition = getServiceDefinition(
traceProto,
"opentelemetry", "proto", "collector", "trace", "v1", "TraceService",
);
const logsServiceDefinition: grpc.ServiceDefinition = getServiceDefinition(
logsProto,
"opentelemetry", "proto", "collector", "logs", "v1", "LogsService",
);
const metricsServiceDefinition: grpc.ServiceDefinition = getServiceDefinition(
metricsProto,
"opentelemetry", "proto", "collector", "metrics", "v1", "MetricsService",
);
const server: grpc.Server = new grpc.Server({
"grpc.max_receive_message_length": 50 * 1024 * 1024, // 50MB
});
server.addService(traceServiceDefinition, {
Export: (call: GrpcCall, callback: GrpcCallback): void => {
handleExport(
call,
callback,
ProductType.Traces,
TracesQueueService.addTraceIngestJob.bind(TracesQueueService),
);
},
});
server.addService(logsServiceDefinition, {
Export: (call: GrpcCall, callback: GrpcCallback): void => {
handleExport(
call,
callback,
ProductType.Logs,
LogsQueueService.addLogIngestJob.bind(LogsQueueService),
);
},
});
server.addService(metricsServiceDefinition, {
Export: (call: GrpcCall, callback: GrpcCallback): void => {
handleExport(
call,
callback,
ProductType.Metrics,
MetricsQueueService.addMetricIngestJob.bind(MetricsQueueService),
);
},
});
server.bindAsync(
`0.0.0.0:${GRPC_PORT}`,
grpc.ServerCredentials.createInsecure(),
(err: Error | null, port: number): void => {
if (err) {
logger.error("Failed to start gRPC server:");
logger.error(err);
return;
}
logger.info(`gRPC OTLP server started on port: ${port}`);
},
);
}

View File

@@ -24,6 +24,7 @@ import Telemetry from "Common/Server/Utils/Telemetry";
import "./Jobs/TelemetryIngest/ProcessTelemetry";
import { TELEMETRY_CONCURRENCY } from "./Config";
import type { StatusAPIOptions } from "Common/Server/API/StatusAPI";
import { startGrpcServer } from "./GrpcServer";
import "ejs";
const app: ExpressApplication = Express.getExpressApp();
@@ -104,6 +105,10 @@ const init: PromiseVoidFunction = async (): Promise<void> => {
);
await Realtime.init();
// Start gRPC OTLP server on port 4317
startGrpcServer();
// add default routes
await App.addDefaultRoutes();
} catch (err) {

View File

@@ -0,0 +1,41 @@
// Copyright 2020, OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package opentelemetry.proto.collector.logs.v1;
import "./logs.proto";
option java_multiple_files = true;
option java_package = "io.opentelemetry.proto.collector.logs.v1";
option java_outer_classname = "LogsServiceProto";
option go_package = "go.opentelemetry.io/proto/otlp/collector/logs/v1";
service LogsService {
rpc Export(ExportLogsServiceRequest) returns (ExportLogsServiceResponse) {}
}
message ExportLogsServiceRequest {
repeated opentelemetry.proto.logs.v1.ResourceLogs resource_logs = 1;
}
message ExportLogsServiceResponse {
ExportLogsPartialSuccess partial_success = 1;
}
message ExportLogsPartialSuccess {
int64 rejected_log_records = 1;
string error_message = 2;
}

View File

@@ -0,0 +1,41 @@
// Copyright 2019, OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package opentelemetry.proto.collector.metrics.v1;
import "./metrics.proto";
option java_multiple_files = true;
option java_package = "io.opentelemetry.proto.collector.metrics.v1";
option java_outer_classname = "MetricsServiceProto";
option go_package = "go.opentelemetry.io/proto/otlp/collector/metrics/v1";
service MetricsService {
rpc Export(ExportMetricsServiceRequest) returns (ExportMetricsServiceResponse) {}
}
message ExportMetricsServiceRequest {
repeated opentelemetry.proto.metrics.v1.ResourceMetrics resource_metrics = 1;
}
message ExportMetricsServiceResponse {
ExportMetricsPartialSuccess partial_success = 1;
}
message ExportMetricsPartialSuccess {
int64 rejected_data_points = 1;
string error_message = 2;
}

View File

@@ -0,0 +1,41 @@
// Copyright 2019, OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package opentelemetry.proto.collector.trace.v1;
import "./traces.proto";
option java_multiple_files = true;
option java_package = "io.opentelemetry.proto.collector.trace.v1";
option java_outer_classname = "TraceServiceProto";
option go_package = "go.opentelemetry.io/proto/otlp/collector/trace/v1";
service TraceService {
rpc Export(ExportTraceServiceRequest) returns (ExportTraceServiceResponse) {}
}
message ExportTraceServiceRequest {
repeated opentelemetry.proto.trace.v1.ResourceSpans resource_spans = 1;
}
message ExportTraceServiceResponse {
ExportTracePartialSuccess partial_success = 1;
}
message ExportTracePartialSuccess {
int64 rejected_spans = 1;
string error_message = 2;
}

View File

@@ -19,6 +19,8 @@
"author": "OneUptime <hello@oneuptime.com> (https://oneuptime.com/)",
"license": "Apache-2.0",
"dependencies": {
"@grpc/grpc-js": "^1.12.5",
"@grpc/proto-loader": "^0.7.13",
"Common": "file:../Common",
"ejs": "^3.1.10",
"protobufjs": "^7.3.2",

View File

@@ -96,14 +96,12 @@ TELEMETRY_HOSTNAME=telemetry:3403
SERVER_APP_HOSTNAME=app
SERVER_TELEMETRY_HOSTNAME=telemetry
SERVER_OTEL_COLLECTOR_HOSTNAME=otel-collector
SERVER_WORKER_HOSTNAME=worker
#Ports. Usually they don't need to change.
APP_PORT=3002
TELEMETRY_PORT=3403
TEST_SERVER_PORT=3800
OTEL_COLLECTOR_HTTP_PORT=4318
HOME_PORT=1444
WORKER_PORT=1445
# Plans
@@ -193,7 +191,7 @@ DISABLE_AUTOMATIC_ALERT_CREATION=false
# If you're using an extrenal open telemetry collector, you can set the endpoint here - both server and client endpoint can be the same in this case.
# You can set the env var to http://otel-collector:4318 if you want instrumentation to be sent to otel collector.
# You can set the env var to an OTLP endpoint if you want instrumentation to be exported.
OPENTELEMETRY_EXPORTER_OTLP_ENDPOINT=
# You can set the env var to "x-oneuptime-token=<YOUR_ONEUPTIME_TELEMETRY_INGEST_TOKEN>"
@@ -295,19 +293,12 @@ DISABLE_TELEMETRY_FOR_STATUS_PAGE=true
DISABLE_TELEMETRY_FOR_DASHBOARD=true
DISABLE_TELEMETRY_FOR_PROBE=true
DISABLE_TELEMETRY_FOR_ADMIN_DASHBOARD=true
DISABLE_TELEMETRY_FOR_OTEL_COLLECTOR=true
DISABLE_TELEMETRY_FOR_INGRESS=true
DISABLE_TELEMETRY_FOR_WORKER=true
DISABLE_TELEMETRY_FOR_AI_AGENT=true
# OPENTELEMETRY_COLLECTOR env vars
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED=true
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_SIZE=1000
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS=3
# Connect OneUptime with Slack App
SLACK_APP_CLIENT_ID=
SLACK_APP_CLIENT_SECRET=

View File

@@ -30,7 +30,6 @@ x-common-variables: &common-variables
SERVER_APP_HOSTNAME: app
SERVER_TELEMETRY_HOSTNAME: telemetry
SERVER_OTEL_COLLECTOR_HOSTNAME: otel-collector
SERVER_WORKER_HOSTNAME: worker
SERVER_HOME_HOSTNAME: home
#Ports. Usually they don't need to change.
@@ -335,23 +334,6 @@ services:
options:
max-size: "1000m"
otel-collector:
networks:
- oneuptime
environment:
<<: *common-variables
DISABLE_TELEMETRY: ${DISABLE_TELEMETRY_FOR_OTEL_COLLECTOR}
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED: ${OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_ENABLED}
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_SIZE: ${OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_SIZE}
OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS: ${OPENTELEMETRY_COLLECTOR_SENDING_QUEUE_NUM_CONSUMERS}
restart: always
logging:
driver: "local"
options:
max-size: "1000m"
ports:
- 13133:13133 # Otel Collector Health Check Endpoint at /heath/status
fluentd:
networks:
- oneuptime

View File

@@ -36,15 +36,6 @@ services:
service: postgres
otel-collector:
extends:
file: ./docker-compose.base.yml
service: otel-collector
build:
network: host
context: .
dockerfile: ./OTelCollector/Dockerfile
test-server:
volumes:
- ./TestServer:/usr/src/app:cached

View File

@@ -29,12 +29,6 @@ services:
service: postgres
otel-collector:
image: oneuptime/otel-collector:${APP_TAG}
extends:
file: ./docker-compose.base.yml
service: otel-collector
app:
image: oneuptime/app:${APP_TAG}
extends: