Merge pull request #2360 from OneUptime/k8s-impl

feat: Add Kubernetes Cluster Management and Monitoring Agent
This commit is contained in:
Simon Larsen
2026-03-18 18:54:29 +00:00
committed by GitHub
107 changed files with 7523 additions and 376 deletions

View File

@@ -0,0 +1,18 @@
# Patterns to ignore when building packages.
.DS_Store
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
*.swp
*.bak
*.tmp
*.orig
*~
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,15 @@
apiVersion: v2
name: kubernetes-agent
description: OneUptime Kubernetes Monitoring Agent — collects cluster metrics, events, and logs via OpenTelemetry and sends them to your OneUptime instance.
icon: https://raw.githubusercontent.com/OneUptime/oneuptime/master/Home/Static/img/OneUptimePNG/1.png
type: application
version: 0.1.0
appVersion: "1.0.0"
annotations:
artifacthub.io/license: MIT
artifacthub.io/category: monitoring-logging
artifacthub.io/prerelease: "false"

View File

@@ -0,0 +1,24 @@
OneUptime Kubernetes Agent has been installed.
Cluster Name: {{ .Values.clusterName }}
OneUptime URL: {{ .Values.oneuptime.url }}
The agent is now collecting:
- Node, pod, and container resource metrics (kubeletstats)
- Cluster-level metrics: deployments, replicas, pod phases (k8s_cluster)
- Kubernetes events (k8sobjects)
{{- if .Values.controlPlane.enabled }}
- Control plane metrics: etcd, API server, scheduler, controller manager (prometheus)
{{- end }}
{{- if .Values.logs.enabled }}
- Pod logs from /var/log/pods (filelog DaemonSet)
{{- end }}
To verify the agent is running:
kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "kubernetes-agent.name" . }}
To check collector logs:
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "kubernetes-agent.name" . }} -c otel-collector
Your cluster should appear in OneUptime within a few minutes at:
{{ .Values.oneuptime.url }}/dashboard/<project-id>/kubernetes

View File

@@ -0,0 +1,59 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "kubernetes-agent.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
*/}}
{{- define "kubernetes-agent.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kubernetes-agent.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "kubernetes-agent.labels" -}}
helm.sh/chart: {{ include "kubernetes-agent.chart" . }}
{{ include "kubernetes-agent.selectorLabels" . }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/part-of: oneuptime
{{- end }}
{{/*
Selector labels
*/}}
{{- define "kubernetes-agent.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kubernetes-agent.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Service account name
*/}}
{{- define "kubernetes-agent.serviceAccountName" -}}
{{- if .Values.serviceAccount.name }}
{{- .Values.serviceAccount.name }}
{{- else }}
{{- include "kubernetes-agent.fullname" . }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,140 @@
{{- if .Values.logs.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "kubernetes-agent.fullname" . }}-daemonset
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
data:
otel-collector-config.yaml: |
extensions:
health_check:
endpoint: "0.0.0.0:13133"
receivers:
# Collect pod logs from /var/log/pods
filelog:
include:
- /var/log/pods/*/*/*.log
exclude:
# Exclude the agent's own logs to avoid feedback loop
- /var/log/pods/{{ "{{ .Release.Namespace }}" }}_{{ "{{ include \"kubernetes-agent.fullname\" . }}" }}*/**/*.log
start_at: end
include_file_path: true
include_file_name: false
operators:
# Parse CRI log format
- type: router
id: get-format
routes:
- output: parser-docker
expr: 'body matches "^\\{"'
- output: parser-cri
expr: 'body matches "^[^ Z]+ "'
- output: parser-containerd
expr: 'body matches "^[^ Z]+Z"'
# Docker JSON log format
- type: json_parser
id: parser-docker
output: extract-metadata-from-filepath
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
# CRI log format
- type: regex_parser
id: parser-cri
regex: '^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
output: extract-metadata-from-filepath
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%L%j'
# Containerd log format
- type: regex_parser
id: parser-containerd
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
output: extract-metadata-from-filepath
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
# Extract k8s metadata from file path
- type: regex_parser
id: extract-metadata-from-filepath
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]+)\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$'
parse_from: attributes["log.file.path"]
- type: move
from: attributes.log
to: body
- type: move
from: attributes.stream
to: attributes["log.iostream"]
- type: move
from: attributes.namespace
to: resource["k8s.namespace.name"]
- type: move
from: attributes.pod_name
to: resource["k8s.pod.name"]
- type: move
from: attributes.container_name
to: resource["k8s.container.name"]
- type: move
from: attributes.uid
to: resource["k8s.pod.uid"]
processors:
# Enrich with K8s metadata
k8sattributes:
auth_type: serviceAccount
extract:
metadata:
- k8s.pod.name
- k8s.pod.uid
- k8s.namespace.name
- k8s.node.name
- k8s.deployment.name
- k8s.replicaset.name
- k8s.statefulset.name
- k8s.daemonset.name
- k8s.container.name
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.uid
# Stamp with cluster name
resource:
attributes:
- key: k8s.cluster.name
value: {{ .Values.clusterName | quote }}
action: upsert
batch:
send_batch_size: 1024
timeout: 10s
memory_limiter:
check_interval: 5s
limit_mib: 200
spike_limit_mib: 50
exporters:
otlphttp:
endpoint: "{{ .Values.oneuptime.url }}/otlp"
headers:
x-oneuptime-token: "${env:ONEUPTIME_API_KEY}"
service:
extensions:
- health_check
pipelines:
logs:
receivers:
- filelog
processors:
- memory_limiter
- k8sattributes
- resource
- batch
exporters:
- otlphttp
{{- end }}

View File

@@ -0,0 +1,183 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "kubernetes-agent.fullname" . }}-deployment
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
data:
otel-collector-config.yaml: |
extensions:
health_check:
endpoint: "0.0.0.0:13133"
receivers:
# Collect node, pod, and container resource metrics from kubelet
kubeletstats:
collection_interval: {{ .Values.collectionInterval }}
auth_type: serviceAccount
endpoint: "https://${env:NODE_NAME}:10250"
insecure_skip_verify: true
metric_groups:
- node
- pod
- container
extra_metadata_labels:
- container.id
k8s_api_config:
auth_type: serviceAccount
# Collect cluster-level metrics from the Kubernetes API
k8s_cluster:
collection_interval: {{ .Values.collectionInterval }}
node_conditions_to_report:
- Ready
- MemoryPressure
- DiskPressure
- PIDPressure
- NetworkUnavailable
allocatable_types_to_report:
- cpu
- memory
- storage
# Watch Kubernetes events and ingest as logs
k8sobjects:
objects:
- name: events
mode: watch
group: events.k8s.io
{{- if .Values.controlPlane.enabled }}
# Scrape control plane metrics via Prometheus endpoints
prometheus:
config:
scrape_configs:
- job_name: etcd
scheme: https
tls_config:
insecure_skip_verify: {{ .Values.controlPlane.etcd.insecureSkipVerify }}
static_configs:
{{- range .Values.controlPlane.etcd.endpoints }}
- targets:
- {{ . | quote }}
{{- end }}
- job_name: kube-apiserver
scheme: https
tls_config:
insecure_skip_verify: {{ .Values.controlPlane.apiServer.insecureSkipVerify }}
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
static_configs:
{{- range .Values.controlPlane.apiServer.endpoints }}
- targets:
- {{ . | quote }}
{{- end }}
- job_name: kube-scheduler
scheme: https
tls_config:
insecure_skip_verify: {{ .Values.controlPlane.scheduler.insecureSkipVerify }}
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
static_configs:
{{- range .Values.controlPlane.scheduler.endpoints }}
- targets:
- {{ . | quote }}
{{- end }}
- job_name: kube-controller-manager
scheme: https
tls_config:
insecure_skip_verify: {{ .Values.controlPlane.controllerManager.insecureSkipVerify }}
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
static_configs:
{{- range .Values.controlPlane.controllerManager.endpoints }}
- targets:
- {{ . | quote }}
{{- end }}
{{- end }}
processors:
# Enrich all telemetry with Kubernetes metadata
k8sattributes:
auth_type: serviceAccount
extract:
metadata:
- k8s.pod.name
- k8s.pod.uid
- k8s.namespace.name
- k8s.node.name
- k8s.deployment.name
- k8s.replicaset.name
- k8s.statefulset.name
- k8s.daemonset.name
- k8s.job.name
- k8s.cronjob.name
- k8s.container.name
labels:
- tag_name: k8s.pod.label.app
key: app
from: pod
- tag_name: k8s.pod.label.app.kubernetes.io/name
key: app.kubernetes.io/name
from: pod
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
# Stamp all telemetry with the cluster name
resource:
attributes:
- key: k8s.cluster.name
value: {{ .Values.clusterName | quote }}
action: upsert
# Batch telemetry for efficient export
batch:
send_batch_size: 200
send_batch_max_size: 500
timeout: 10s
# Limit memory usage
memory_limiter:
check_interval: 5s
limit_mib: 1500
spike_limit_mib: 300
exporters:
otlphttp:
endpoint: "{{ .Values.oneuptime.url }}/otlp"
headers:
x-oneuptime-token: "${env:ONEUPTIME_API_KEY}"
service:
extensions:
- health_check
pipelines:
metrics:
receivers:
- kubeletstats
- k8s_cluster
{{- if .Values.controlPlane.enabled }}
- prometheus
{{- end }}
processors:
- memory_limiter
- k8sattributes
- resource
- batch
exporters:
- otlphttp
logs:
receivers:
- k8sobjects
processors:
- memory_limiter
- k8sattributes
- resource
- batch
exporters:
- otlphttp

View File

@@ -0,0 +1,56 @@
{{- if .Values.logs.enabled }}
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{ include "kubernetes-agent.fullname" . }}-logs
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
component: log-collector
spec:
selector:
matchLabels:
{{- include "kubernetes-agent.selectorLabels" . | nindent 6 }}
component: log-collector
template:
metadata:
labels:
{{- include "kubernetes-agent.selectorLabels" . | nindent 8 }}
component: log-collector
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap-daemonset.yaml") . | sha256sum }}
spec:
serviceAccountName: {{ include "kubernetes-agent.serviceAccountName" . }}
containers:
- name: otel-collector
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- "--config=/etc/otel/otel-collector-config.yaml"
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: ONEUPTIME_API_KEY
valueFrom:
secretKeyRef:
name: {{ include "kubernetes-agent.fullname" . }}
key: api-key
resources:
{{- toYaml .Values.logs.resources | nindent 12 }}
volumeMounts:
- name: config
mountPath: /etc/otel
readOnly: true
- name: varlogpods
mountPath: /var/log/pods
readOnly: true
volumes:
- name: config
configMap:
name: {{ include "kubernetes-agent.fullname" . }}-daemonset
- name: varlogpods
hostPath:
path: /var/log/pods
{{- end }}

View File

@@ -0,0 +1,67 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "kubernetes-agent.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
component: metrics-collector
spec:
replicas: {{ .Values.deployment.replicas }}
selector:
matchLabels:
{{- include "kubernetes-agent.selectorLabels" . | nindent 6 }}
component: metrics-collector
template:
metadata:
labels:
{{- include "kubernetes-agent.selectorLabels" . | nindent 8 }}
component: metrics-collector
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap-deployment.yaml") . | sha256sum }}
spec:
serviceAccountName: {{ include "kubernetes-agent.serviceAccountName" . }}
containers:
- name: otel-collector
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- "--config=/etc/otel/otel-collector-config.yaml"
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: CLUSTER_NAME
value: {{ .Values.clusterName | quote }}
- name: ONEUPTIME_API_KEY
valueFrom:
secretKeyRef:
name: {{ include "kubernetes-agent.fullname" . }}
key: api-key
ports:
- name: health
containerPort: 13133
protocol: TCP
livenessProbe:
httpGet:
path: /
port: health
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /
port: health
initialDelaySeconds: 5
periodSeconds: 10
resources:
{{- toYaml .Values.deployment.resources | nindent 12 }}
volumeMounts:
- name: config
mountPath: /etc/otel
readOnly: true
volumes:
- name: config
configMap:
name: {{ include "kubernetes-agent.fullname" . }}-deployment

View File

@@ -0,0 +1,88 @@
{{- if .Values.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "kubernetes-agent.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "kubernetes-agent.fullname" . }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
rules:
# For k8s_cluster receiver and k8sattributes processor
- apiGroups: [""]
resources:
- pods
- nodes
- nodes/proxy
- nodes/stats
- services
- endpoints
- namespaces
- events
- replicationcontrollers
- resourcequotas
- limitranges
- configmaps
- persistentvolumeclaims
- persistentvolumes
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources:
- deployments
- replicasets
- statefulsets
- daemonsets
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources:
- jobs
- cronjobs
verbs: ["get", "list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["get", "list", "watch"]
- apiGroups: ["networking.k8s.io"]
resources:
- ingresses
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources:
- ingresses
verbs: ["get", "list", "watch"]
# For k8sobjects receiver to watch events
- apiGroups: ["events.k8s.io"]
resources:
- events
verbs: ["get", "list", "watch"]
# For kubeletstats receiver
- nonResourceURLs:
- /metrics
- /metrics/cadvisor
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "kubernetes-agent.fullname" . }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "kubernetes-agent.fullname" . }}
subjects:
- kind: ServiceAccount
name: {{ include "kubernetes-agent.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}

View File

@@ -0,0 +1,10 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "kubernetes-agent.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kubernetes-agent.labels" . | nindent 4 }}
type: Opaque
data:
api-key: {{ .Values.oneuptime.apiKey | b64enc | quote }}

View File

@@ -0,0 +1,210 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"required": ["oneuptime", "clusterName"],
"properties": {
"oneuptime": {
"type": "object",
"description": "OneUptime instance connection details",
"required": ["url", "apiKey"],
"properties": {
"url": {
"type": "string",
"description": "URL of your OneUptime instance (e.g., https://oneuptime.example.com)"
},
"apiKey": {
"type": "string",
"description": "Project API key from OneUptime (Settings > API Keys)"
}
},
"additionalProperties": false
},
"clusterName": {
"type": "string",
"description": "Unique name for this cluster (used as k8s.cluster.name attribute)"
},
"namespaceFilters": {
"type": "object",
"description": "Namespace filters to limit which namespaces are monitored",
"properties": {
"include": {
"type": "array",
"items": { "type": "string" },
"description": "If set, only these namespaces are monitored (empty = all namespaces)"
},
"exclude": {
"type": "array",
"items": { "type": "string" },
"description": "Namespaces to exclude from monitoring"
}
},
"additionalProperties": false
},
"image": {
"type": "object",
"description": "OTel Collector image configuration",
"properties": {
"repository": {
"type": "string"
},
"tag": {
"type": "string"
},
"pullPolicy": {
"type": "string",
"enum": ["Always", "IfNotPresent", "Never"]
}
},
"additionalProperties": false
},
"deployment": {
"type": "object",
"description": "Deployment (metrics + events collector) resource configuration",
"properties": {
"replicas": {
"type": "integer",
"minimum": 1
},
"resources": {
"type": "object",
"properties": {
"requests": {
"type": "object",
"properties": {
"cpu": { "type": "string" },
"memory": { "type": "string" }
},
"additionalProperties": false
},
"limits": {
"type": "object",
"properties": {
"cpu": { "type": "string" },
"memory": { "type": "string" }
},
"additionalProperties": false
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"controlPlane": {
"type": "object",
"description": "Control plane monitoring configuration",
"properties": {
"enabled": {
"type": "boolean"
},
"etcd": {
"type": "object",
"properties": {
"endpoints": {
"type": "array",
"items": { "type": "string" }
},
"insecureSkipVerify": {
"type": "boolean"
}
},
"additionalProperties": false
},
"apiServer": {
"type": "object",
"properties": {
"endpoints": {
"type": "array",
"items": { "type": "string" }
},
"insecureSkipVerify": {
"type": "boolean"
}
},
"additionalProperties": false
},
"scheduler": {
"type": "object",
"properties": {
"endpoints": {
"type": "array",
"items": { "type": "string" }
},
"insecureSkipVerify": {
"type": "boolean"
}
},
"additionalProperties": false
},
"controllerManager": {
"type": "object",
"properties": {
"endpoints": {
"type": "array",
"items": { "type": "string" }
},
"insecureSkipVerify": {
"type": "boolean"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"logs": {
"type": "object",
"description": "Pod log collection via DaemonSet with filelog receiver",
"properties": {
"enabled": {
"type": "boolean"
},
"resources": {
"type": "object",
"properties": {
"requests": {
"type": "object",
"properties": {
"cpu": { "type": "string" },
"memory": { "type": "string" }
},
"additionalProperties": false
},
"limits": {
"type": "object",
"properties": {
"cpu": { "type": "string" },
"memory": { "type": "string" }
},
"additionalProperties": false
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"collectionInterval": {
"type": "string",
"description": "Collection interval for metrics (e.g., 30s, 1m)"
},
"serviceAccount": {
"type": "object",
"description": "Service account configuration",
"properties": {
"create": {
"type": "boolean"
},
"name": {
"type": "string"
},
"annotations": {
"type": "object",
"additionalProperties": { "type": "string" }
}
},
"additionalProperties": false
}
},
"additionalProperties": false
}

View File

@@ -0,0 +1,80 @@
# OneUptime Kubernetes Agent Configuration
# Required: Your OneUptime instance connection details
oneuptime:
# URL of your OneUptime instance (e.g., https://oneuptime.example.com)
url: ""
# Project API key from OneUptime (Settings > API Keys)
apiKey: ""
# Required: Unique name for this cluster (used as k8s.cluster.name attribute)
clusterName: ""
# Namespace filters — limit which namespaces are monitored
namespaceFilters:
# If set, only these namespaces are monitored (empty = all namespaces)
include: []
# Namespaces to exclude from monitoring
exclude:
- kube-system
# OTel Collector image configuration
image:
repository: otel/opentelemetry-collector-contrib
tag: "0.96.0"
pullPolicy: IfNotPresent
# Deployment (metrics + events collector) resource configuration
deployment:
replicas: 1
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
# Control plane monitoring (etcd, API server, scheduler, controller manager)
# Disabled by default — enable for self-managed clusters.
# Managed K8s (EKS, GKE, AKS) typically don't expose control plane metrics.
controlPlane:
enabled: false
etcd:
# Endpoints to scrape etcd metrics from
endpoints:
- https://localhost:2379/metrics
# TLS configuration for etcd
insecureSkipVerify: true
apiServer:
endpoints:
- https://localhost:6443/metrics
insecureSkipVerify: true
scheduler:
endpoints:
- https://localhost:10259/metrics
insecureSkipVerify: true
controllerManager:
endpoints:
- https://localhost:10257/metrics
insecureSkipVerify: true
# Pod log collection via DaemonSet with filelog receiver
logs:
enabled: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
# Collection intervals
collectionInterval: 30s
# Service account configuration
serviceAccount:
create: true
name: ""
annotations: {}

View File

@@ -1,4 +1,4 @@
{{- if .Values.aiAgent.enabled }}
{{- if and .Values.aiAgent.enabled (not .Values.deployment.disableDeployments) }}
apiVersion: apps/v1
kind: Deployment
metadata:

View File

@@ -1,4 +1,4 @@
{{- if $.Values.app.enabled }}
{{- if and $.Values.app.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime app Deployment
apiVersion: apps/v1
kind: Deployment
@@ -137,7 +137,7 @@ spec:
---
{{- end }}
{{- if $.Values.app.enabled }}
{{- if and $.Values.app.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime app Service
{{- $appPorts := dict "port" $.Values.app.ports.http -}}
{{- $appServiceArgs := dict "ServiceName" "app" "Ports" $appPorts "Release" $.Release "Values" $.Values -}}

View File

@@ -1,4 +1,4 @@
{{- if $.Values.cronJobs.cleanup.enabled }}
{{- if and $.Values.cronJobs.cleanup.enabled (not $.Values.deployment.disableDeployments) }}
apiVersion: batch/v1
kind: CronJob

View File

@@ -1,4 +1,4 @@
{{- if $.Values.home.enabled }}
{{- if and $.Values.home.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime Home Deployment
apiVersion: apps/v1
kind: Deployment
@@ -126,7 +126,7 @@ spec:
{{- end }}
{{- if $.Values.home.enabled }}
{{- if and $.Values.home.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime home Service
{{- $homePorts := $.Values.home.ports -}}
{{- $homeServiceArgs := dict "ServiceName" "home" "Ports" $homePorts "Release" $.Release "Values" $.Values -}}

View File

@@ -3,7 +3,7 @@ KEDA ScaledObjects for various services
*/}}
{{/* Telemetry KEDA ScaledObject */}}
{{- if and .Values.keda.enabled .Values.telemetry.enabled .Values.telemetry.keda.enabled (not .Values.telemetry.disableAutoscaler) }}
{{- if and .Values.keda.enabled .Values.telemetry.enabled .Values.telemetry.keda.enabled (not .Values.telemetry.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
{{- $metricsConfig := dict "enabled" .Values.telemetry.keda.enabled "minReplicas" .Values.telemetry.keda.minReplicas "maxReplicas" .Values.telemetry.keda.maxReplicas "pollingInterval" .Values.telemetry.keda.pollingInterval "cooldownPeriod" .Values.telemetry.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_telemetry_queue_size" "threshold" .Values.telemetry.keda.queueSizeThreshold "port" .Values.telemetry.ports.http)) }}
{{- $telemetryKedaArgs := dict "ServiceName" "telemetry" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.telemetry.disableAutoscaler }}
{{- include "oneuptime.kedaScaledObject" $telemetryKedaArgs }}
@@ -12,7 +12,7 @@ KEDA ScaledObjects for various services
{{/* Probe KEDA ScaledObjects - one for each probe configuration */}}
{{- range $key, $val := $.Values.probes }}
{{- $probeEnabled := or (not (hasKey $val "enabled")) $val.enabled }}
{{- if and $.Values.keda.enabled $probeEnabled (and $val.keda $val.keda.enabled) (not $val.disableAutoscaler) }}
{{- if and $.Values.keda.enabled $probeEnabled (and $val.keda $val.keda.enabled) (not $val.disableAutoscaler) (not $.Values.deployment.disableDeployments) }}
{{- $serviceName := printf "probe-%s" $key }}
{{- $probePort := 3874 }}
{{- if and $val.ports $val.ports.http }}
@@ -25,14 +25,14 @@ KEDA ScaledObjects for various services
{{- end }}
{{/* Worker KEDA ScaledObject */}}
{{- if and .Values.keda.enabled .Values.worker.enabled .Values.worker.keda.enabled (not .Values.worker.disableAutoscaler) }}
{{- if and .Values.keda.enabled .Values.worker.enabled .Values.worker.keda.enabled (not .Values.worker.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
{{- $metricsConfig := dict "enabled" .Values.worker.keda.enabled "minReplicas" .Values.worker.keda.minReplicas "maxReplicas" .Values.worker.keda.maxReplicas "pollingInterval" .Values.worker.keda.pollingInterval "cooldownPeriod" .Values.worker.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_worker_queue_size" "threshold" .Values.worker.keda.queueSizeThreshold "port" .Values.worker.ports.http)) }}
{{- $workerKedaArgs := dict "ServiceName" "worker" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.worker.disableAutoscaler }}
{{- include "oneuptime.kedaScaledObject" $workerKedaArgs }}
{{- end }}
{{/* AI Agent KEDA ScaledObject */}}
{{- if and .Values.keda.enabled .Values.aiAgent.enabled .Values.aiAgent.keda.enabled (not .Values.aiAgent.disableAutoscaler) }}
{{- if and .Values.keda.enabled .Values.aiAgent.enabled .Values.aiAgent.keda.enabled (not .Values.aiAgent.disableAutoscaler) (not .Values.deployment.disableDeployments) }}
{{- $metricsConfig := dict "enabled" .Values.aiAgent.keda.enabled "minReplicas" .Values.aiAgent.keda.minReplicas "maxReplicas" .Values.aiAgent.keda.maxReplicas "pollingInterval" .Values.aiAgent.keda.pollingInterval "cooldownPeriod" .Values.aiAgent.keda.cooldownPeriod "triggers" (list (dict "query" "oneuptime_ai_agent_queue_size" "threshold" .Values.aiAgent.keda.queueSizeThreshold "port" .Values.aiAgent.ports.http)) }}
{{- $aiAgentKedaArgs := dict "ServiceName" "ai-agent" "Release" .Release "Values" .Values "MetricsConfig" $metricsConfig "DisableAutoscaler" .Values.aiAgent.disableAutoscaler }}
{{- include "oneuptime.kedaScaledObject" $aiAgentKedaArgs }}

View File

@@ -1,4 +1,4 @@
{{- if $.Values.nginx.enabled }}
{{- if and $.Values.nginx.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime nginx Deployment
apiVersion: apps/v1

View File

@@ -1,3 +1,4 @@
{{- if not $.Values.deployment.disableDeployments }}
{{- range $key, $val := $.Values.probes }}
{{- if or (not (hasKey $val "enabled")) $val.enabled }}
apiVersion: apps/v1
@@ -166,3 +167,4 @@ spec:
---
{{- end }}
{{- end }}

View File

@@ -1,4 +1,4 @@
{{- if $.Values.telemetry.enabled }}
{{- if and $.Values.telemetry.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime telemetry Deployment
apiVersion: apps/v1
@@ -137,7 +137,7 @@ spec:
---
{{- if $.Values.telemetry.enabled }}
{{- if and $.Values.telemetry.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime telemetry Service
{{- $telemetryPorts := dict "http" $.Values.telemetry.ports.http "grpc" $.Values.telemetry.ports.grpc -}}
{{- $telemetryServiceArgs := dict "ServiceName" "telemetry" "Ports" $telemetryPorts "Release" $.Release "Values" $.Values -}}

View File

@@ -1,4 +1,4 @@
{{- if $.Values.testServer.enabled }}
{{- if and $.Values.testServer.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime test-server Deployment
{{- $testServerPorts := $.Values.testServer.ports -}}

View File

@@ -1,4 +1,4 @@
{{- if $.Values.worker.enabled }}
{{- if and $.Values.worker.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime worker Deployment
apiVersion: apps/v1
kind: Deployment
@@ -128,7 +128,7 @@ spec:
---
{{- if $.Values.worker.enabled }}
{{- if and $.Values.worker.enabled (not $.Values.deployment.disableDeployments) }}
# OneUptime worker Service
{{- $workerPorts := $.Values.worker.ports -}}
{{- $workerServiceArgs := dict "ServiceName" "worker" "Ports" $workerPorts "Release" $.Release "Values" $.Values -}}

View File

@@ -86,6 +86,11 @@
"deployment": {
"type": "object",
"properties": {
"disableDeployments": {
"type": "boolean",
"description": "When set to true, no OneUptime deployments are provisioned. Only databases (Redis, ClickHouse, Postgres) will be running.",
"default": false
},
"includeTimestampLabel": {
"type": "boolean"
},

View File

@@ -48,6 +48,8 @@ externalSecrets:
passwordKey:
deployment:
# When set to true, no OneUptime deployments are provisioned. Only databases (Redis, ClickHouse, Postgres) will be running.
disableDeployments: false
# Default replica count for all deployments
replicaCount: 1
# Update strategy type for all deployments