mirror of
https://github.com/OneUptime/oneuptime.git
synced 2026-04-06 00:32:12 +02:00
feat: Add Kubernetes Cluster Management and Monitoring Agent
- Implemented a new migration for the KubernetesCluster and KubernetesClusterLabel tables in the database. - Created a KubernetesClusterService for managing cluster instances, including methods for finding or creating clusters, updating their status, and marking disconnected clusters. - Introduced a Helm chart for the OneUptime Kubernetes Monitoring Agent, including configuration files, deployment templates, and RBAC settings. - Added support for collecting metrics and logs from Kubernetes clusters using OpenTelemetry. - Configured service accounts, secrets, and resource limits for the agent's deployment and daemonset. - Provided detailed notes and helper templates for the Helm chart to facilitate installation and configuration.
This commit is contained in:
18
HelmChart/Public/kubernetes-agent/.helmignore
Normal file
18
HelmChart/Public/kubernetes-agent/.helmignore
Normal file
@@ -0,0 +1,18 @@
|
||||
# Patterns to ignore when building packages.
|
||||
.DS_Store
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
15
HelmChart/Public/kubernetes-agent/Chart.yaml
Normal file
15
HelmChart/Public/kubernetes-agent/Chart.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
apiVersion: v2
|
||||
name: kubernetes-agent
|
||||
description: OneUptime Kubernetes Monitoring Agent — collects cluster metrics, events, and logs via OpenTelemetry and sends them to your OneUptime instance.
|
||||
icon: https://raw.githubusercontent.com/OneUptime/oneuptime/master/Home/Static/img/OneUptimePNG/1.png
|
||||
|
||||
type: application
|
||||
|
||||
version: 0.1.0
|
||||
|
||||
appVersion: "1.0.0"
|
||||
|
||||
annotations:
|
||||
artifacthub.io/license: MIT
|
||||
artifacthub.io/category: monitoring-logging
|
||||
artifacthub.io/prerelease: "false"
|
||||
24
HelmChart/Public/kubernetes-agent/templates/NOTES.txt
Normal file
24
HelmChart/Public/kubernetes-agent/templates/NOTES.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
OneUptime Kubernetes Agent has been installed.
|
||||
|
||||
Cluster Name: {{ .Values.clusterName }}
|
||||
OneUptime URL: {{ .Values.oneuptime.url }}
|
||||
|
||||
The agent is now collecting:
|
||||
- Node, pod, and container resource metrics (kubeletstats)
|
||||
- Cluster-level metrics: deployments, replicas, pod phases (k8s_cluster)
|
||||
- Kubernetes events (k8sobjects)
|
||||
{{- if .Values.controlPlane.enabled }}
|
||||
- Control plane metrics: etcd, API server, scheduler, controller manager (prometheus)
|
||||
{{- end }}
|
||||
{{- if .Values.logs.enabled }}
|
||||
- Pod logs from /var/log/pods (filelog DaemonSet)
|
||||
{{- end }}
|
||||
|
||||
To verify the agent is running:
|
||||
kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "kubernetes-agent.name" . }}
|
||||
|
||||
To check collector logs:
|
||||
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "kubernetes-agent.name" . }} -c otel-collector
|
||||
|
||||
Your cluster should appear in OneUptime within a few minutes at:
|
||||
{{ .Values.oneuptime.url }}/dashboard/<project-id>/kubernetes
|
||||
59
HelmChart/Public/kubernetes-agent/templates/_helpers.tpl
Normal file
59
HelmChart/Public/kubernetes-agent/templates/_helpers.tpl
Normal file
@@ -0,0 +1,59 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.labels" -}}
|
||||
helm.sh/chart: {{ include "kubernetes-agent.chart" . }}
|
||||
{{ include "kubernetes-agent.selectorLabels" . }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
app.kubernetes.io/part-of: oneuptime
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "kubernetes-agent.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Service account name
|
||||
*/}}
|
||||
{{- define "kubernetes-agent.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.name }}
|
||||
{{- .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- include "kubernetes-agent.fullname" . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,134 @@
|
||||
{{- if .Values.logs.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}-daemonset
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
data:
|
||||
otel-collector-config.yaml: |
|
||||
receivers:
|
||||
# Collect pod logs from /var/log/pods
|
||||
filelog:
|
||||
include:
|
||||
- /var/log/pods/*/*/*.log
|
||||
exclude:
|
||||
# Exclude the agent's own logs to avoid feedback loop
|
||||
- /var/log/pods/{{ "{{ .Release.Namespace }}" }}_{{ "{{ include \"kubernetes-agent.fullname\" . }}" }}*/**/*.log
|
||||
start_at: end
|
||||
include_file_path: true
|
||||
include_file_name: false
|
||||
operators:
|
||||
# Parse CRI log format
|
||||
- type: router
|
||||
id: get-format
|
||||
routes:
|
||||
- output: parser-docker
|
||||
expr: 'body matches "^\\{"'
|
||||
- output: parser-cri
|
||||
expr: 'body matches "^[^ Z]+ "'
|
||||
- output: parser-containerd
|
||||
expr: 'body matches "^[^ Z]+Z"'
|
||||
# Docker JSON log format
|
||||
- type: json_parser
|
||||
id: parser-docker
|
||||
output: extract-metadata-from-filepath
|
||||
timestamp:
|
||||
parse_from: attributes.time
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
||||
# CRI log format
|
||||
- type: regex_parser
|
||||
id: parser-cri
|
||||
regex: '^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
|
||||
output: extract-metadata-from-filepath
|
||||
timestamp:
|
||||
parse_from: attributes.time
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%L%j'
|
||||
# Containerd log format
|
||||
- type: regex_parser
|
||||
id: parser-containerd
|
||||
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
|
||||
output: extract-metadata-from-filepath
|
||||
timestamp:
|
||||
parse_from: attributes.time
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
||||
# Extract k8s metadata from file path
|
||||
- type: regex_parser
|
||||
id: extract-metadata-from-filepath
|
||||
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]+)\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$'
|
||||
parse_from: attributes["log.file.path"]
|
||||
- type: move
|
||||
from: attributes.log
|
||||
to: body
|
||||
- type: move
|
||||
from: attributes.stream
|
||||
to: attributes["log.iostream"]
|
||||
- type: move
|
||||
from: attributes.namespace
|
||||
to: resource["k8s.namespace.name"]
|
||||
- type: move
|
||||
from: attributes.pod_name
|
||||
to: resource["k8s.pod.name"]
|
||||
- type: move
|
||||
from: attributes.container_name
|
||||
to: resource["k8s.container.name"]
|
||||
- type: move
|
||||
from: attributes.uid
|
||||
to: resource["k8s.pod.uid"]
|
||||
|
||||
processors:
|
||||
# Enrich with K8s metadata
|
||||
k8sattributes:
|
||||
auth_type: serviceAccount
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.pod.uid
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
- k8s.deployment.name
|
||||
- k8s.replicaset.name
|
||||
- k8s.statefulset.name
|
||||
- k8s.daemonset.name
|
||||
- k8s.container.name
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
|
||||
# Stamp with cluster name
|
||||
resource:
|
||||
attributes:
|
||||
- key: k8s.cluster.name
|
||||
value: {{ .Values.clusterName | quote }}
|
||||
action: upsert
|
||||
|
||||
batch:
|
||||
send_batch_size: 1024
|
||||
timeout: 10s
|
||||
|
||||
memory_limiter:
|
||||
check_interval: 5s
|
||||
limit_mib: 200
|
||||
spike_limit_mib: 50
|
||||
|
||||
exporters:
|
||||
otlphttp:
|
||||
endpoint: "{{ .Values.oneuptime.url }}"
|
||||
headers:
|
||||
x-oneuptime-token: "${env:ONEUPTIME_API_KEY}"
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
logs:
|
||||
receivers:
|
||||
- filelog
|
||||
processors:
|
||||
- memory_limiter
|
||||
- k8sattributes
|
||||
- resource
|
||||
- batch
|
||||
exporters:
|
||||
- otlphttp
|
||||
{{- end }}
|
||||
@@ -0,0 +1,176 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}-deployment
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
data:
|
||||
otel-collector-config.yaml: |
|
||||
receivers:
|
||||
# Collect node, pod, and container resource metrics from kubelet
|
||||
kubeletstats:
|
||||
collection_interval: {{ .Values.collectionInterval }}
|
||||
auth_type: serviceAccount
|
||||
endpoint: "https://${env:NODE_NAME}:10250"
|
||||
insecure_skip_verify: true
|
||||
metric_groups:
|
||||
- node
|
||||
- pod
|
||||
- container
|
||||
extra_metadata_labels:
|
||||
- container.id
|
||||
k8s_api_config:
|
||||
auth_type: serviceAccount
|
||||
|
||||
# Collect cluster-level metrics from the Kubernetes API
|
||||
k8s_cluster:
|
||||
collection_interval: {{ .Values.collectionInterval }}
|
||||
node_conditions_to_report:
|
||||
- Ready
|
||||
- MemoryPressure
|
||||
- DiskPressure
|
||||
- PIDPressure
|
||||
- NetworkUnavailable
|
||||
allocatable_types_to_report:
|
||||
- cpu
|
||||
- memory
|
||||
- storage
|
||||
|
||||
# Watch Kubernetes events and ingest as logs
|
||||
k8sobjects:
|
||||
objects:
|
||||
- name: events
|
||||
mode: watch
|
||||
group: events.k8s.io
|
||||
|
||||
{{- if .Values.controlPlane.enabled }}
|
||||
# Scrape control plane metrics via Prometheus endpoints
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: etcd
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: {{ .Values.controlPlane.etcd.insecureSkipVerify }}
|
||||
static_configs:
|
||||
{{- range .Values.controlPlane.etcd.endpoints }}
|
||||
- targets:
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
- job_name: kube-apiserver
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: {{ .Values.controlPlane.apiServer.insecureSkipVerify }}
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
static_configs:
|
||||
{{- range .Values.controlPlane.apiServer.endpoints }}
|
||||
- targets:
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
- job_name: kube-scheduler
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: {{ .Values.controlPlane.scheduler.insecureSkipVerify }}
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
static_configs:
|
||||
{{- range .Values.controlPlane.scheduler.endpoints }}
|
||||
- targets:
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
- job_name: kube-controller-manager
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: {{ .Values.controlPlane.controllerManager.insecureSkipVerify }}
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
static_configs:
|
||||
{{- range .Values.controlPlane.controllerManager.endpoints }}
|
||||
- targets:
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
processors:
|
||||
# Enrich all telemetry with Kubernetes metadata
|
||||
k8sattributes:
|
||||
auth_type: serviceAccount
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.pod.uid
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
- k8s.deployment.name
|
||||
- k8s.replicaset.name
|
||||
- k8s.statefulset.name
|
||||
- k8s.daemonset.name
|
||||
- k8s.job.name
|
||||
- k8s.cronjob.name
|
||||
- k8s.container.name
|
||||
labels:
|
||||
- tag_name: k8s.pod.label.app
|
||||
key: app
|
||||
from: pod
|
||||
- tag_name: k8s.pod.label.app.kubernetes.io/name
|
||||
key: app.kubernetes.io/name
|
||||
from: pod
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
# Stamp all telemetry with the cluster name
|
||||
resource:
|
||||
attributes:
|
||||
- key: k8s.cluster.name
|
||||
value: {{ .Values.clusterName | quote }}
|
||||
action: upsert
|
||||
|
||||
# Batch telemetry for efficient export
|
||||
batch:
|
||||
send_batch_size: 1024
|
||||
timeout: 10s
|
||||
|
||||
# Limit memory usage
|
||||
memory_limiter:
|
||||
check_interval: 5s
|
||||
limit_mib: 400
|
||||
spike_limit_mib: 100
|
||||
|
||||
exporters:
|
||||
otlphttp:
|
||||
endpoint: "{{ .Values.oneuptime.url }}"
|
||||
headers:
|
||||
x-oneuptime-token: "${env:ONEUPTIME_API_KEY}"
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers:
|
||||
- kubeletstats
|
||||
- k8s_cluster
|
||||
{{- if .Values.controlPlane.enabled }}
|
||||
- prometheus
|
||||
{{- end }}
|
||||
processors:
|
||||
- memory_limiter
|
||||
- k8sattributes
|
||||
- resource
|
||||
- batch
|
||||
exporters:
|
||||
- otlphttp
|
||||
logs:
|
||||
receivers:
|
||||
- k8sobjects
|
||||
processors:
|
||||
- memory_limiter
|
||||
- k8sattributes
|
||||
- resource
|
||||
- batch
|
||||
exporters:
|
||||
- otlphttp
|
||||
56
HelmChart/Public/kubernetes-agent/templates/daemonset.yaml
Normal file
56
HelmChart/Public/kubernetes-agent/templates/daemonset.yaml
Normal file
@@ -0,0 +1,56 @@
|
||||
{{- if .Values.logs.enabled }}
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}-logs
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
component: log-collector
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "kubernetes-agent.selectorLabels" . | nindent 6 }}
|
||||
component: log-collector
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "kubernetes-agent.selectorLabels" . | nindent 8 }}
|
||||
component: log-collector
|
||||
annotations:
|
||||
checksum/config: {{ include (print $.Template.BasePath "/configmap-daemonset.yaml") . | sha256sum }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "kubernetes-agent.serviceAccountName" . }}
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
- "--config=/etc/otel/otel-collector-config.yaml"
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: ONEUPTIME_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
key: api-key
|
||||
resources:
|
||||
{{- toYaml .Values.logs.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/otel
|
||||
readOnly: true
|
||||
- name: varlogpods
|
||||
mountPath: /var/log/pods
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}-daemonset
|
||||
- name: varlogpods
|
||||
hostPath:
|
||||
path: /var/log/pods
|
||||
{{- end }}
|
||||
67
HelmChart/Public/kubernetes-agent/templates/deployment.yaml
Normal file
67
HelmChart/Public/kubernetes-agent/templates/deployment.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
component: metrics-collector
|
||||
spec:
|
||||
replicas: {{ .Values.deployment.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "kubernetes-agent.selectorLabels" . | nindent 6 }}
|
||||
component: metrics-collector
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "kubernetes-agent.selectorLabels" . | nindent 8 }}
|
||||
component: metrics-collector
|
||||
annotations:
|
||||
checksum/config: {{ include (print $.Template.BasePath "/configmap-deployment.yaml") . | sha256sum }}
|
||||
spec:
|
||||
serviceAccountName: {{ include "kubernetes-agent.serviceAccountName" . }}
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
- "--config=/etc/otel/otel-collector-config.yaml"
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: CLUSTER_NAME
|
||||
value: {{ .Values.clusterName | quote }}
|
||||
- name: ONEUPTIME_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
key: api-key
|
||||
ports:
|
||||
- name: health
|
||||
containerPort: 13133
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: health
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: health
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
{{- toYaml .Values.deployment.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/otel
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}-deployment
|
||||
88
HelmChart/Public/kubernetes-agent/templates/rbac.yaml
Normal file
88
HelmChart/Public/kubernetes-agent/templates/rbac.yaml
Normal file
@@ -0,0 +1,88 @@
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
rules:
|
||||
# For k8s_cluster receiver and k8sattributes processor
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- nodes/stats
|
||||
- services
|
||||
- endpoints
|
||||
- namespaces
|
||||
- events
|
||||
- replicationcontrollers
|
||||
- resourcequotas
|
||||
- limitranges
|
||||
- configmaps
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- deployments
|
||||
- replicasets
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- jobs
|
||||
- cronjobs
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
# For k8sobjects receiver to watch events
|
||||
- apiGroups: ["events.k8s.io"]
|
||||
resources:
|
||||
- events
|
||||
verbs: ["get", "list", "watch"]
|
||||
# For kubeletstats receiver
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
- /metrics/cadvisor
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ include "kubernetes-agent.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
10
HelmChart/Public/kubernetes-agent/templates/secret.yaml
Normal file
10
HelmChart/Public/kubernetes-agent/templates/secret.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "kubernetes-agent.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kubernetes-agent.labels" . | nindent 4 }}
|
||||
type: Opaque
|
||||
data:
|
||||
api-key: {{ .Values.oneuptime.apiKey | b64enc | quote }}
|
||||
80
HelmChart/Public/kubernetes-agent/values.yaml
Normal file
80
HelmChart/Public/kubernetes-agent/values.yaml
Normal file
@@ -0,0 +1,80 @@
|
||||
# OneUptime Kubernetes Agent Configuration
|
||||
|
||||
# Required: Your OneUptime instance connection details
|
||||
oneuptime:
|
||||
# URL of your OneUptime instance (e.g., https://oneuptime.example.com)
|
||||
url: ""
|
||||
# Project API key from OneUptime (Settings > API Keys)
|
||||
apiKey: ""
|
||||
|
||||
# Required: Unique name for this cluster (used as k8s.cluster.name attribute)
|
||||
clusterName: ""
|
||||
|
||||
# Namespace filters — limit which namespaces are monitored
|
||||
namespaceFilters:
|
||||
# If set, only these namespaces are monitored (empty = all namespaces)
|
||||
include: []
|
||||
# Namespaces to exclude from monitoring
|
||||
exclude:
|
||||
- kube-system
|
||||
|
||||
# OTel Collector image configuration
|
||||
image:
|
||||
repository: otel/opentelemetry-collector-contrib
|
||||
tag: "0.96.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Deployment (metrics + events collector) resource configuration
|
||||
deployment:
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
# Control plane monitoring (etcd, API server, scheduler, controller manager)
|
||||
# Disabled by default — enable for self-managed clusters.
|
||||
# Managed K8s (EKS, GKE, AKS) typically don't expose control plane metrics.
|
||||
controlPlane:
|
||||
enabled: false
|
||||
etcd:
|
||||
# Endpoints to scrape etcd metrics from
|
||||
endpoints:
|
||||
- https://localhost:2379/metrics
|
||||
# TLS configuration for etcd
|
||||
insecureSkipVerify: true
|
||||
apiServer:
|
||||
endpoints:
|
||||
- https://localhost:6443/metrics
|
||||
insecureSkipVerify: true
|
||||
scheduler:
|
||||
endpoints:
|
||||
- https://localhost:10259/metrics
|
||||
insecureSkipVerify: true
|
||||
controllerManager:
|
||||
endpoints:
|
||||
- https://localhost:10257/metrics
|
||||
insecureSkipVerify: true
|
||||
|
||||
# Pod log collection via DaemonSet with filelog receiver
|
||||
logs:
|
||||
enabled: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
|
||||
# Collection intervals
|
||||
collectionInterval: 30s
|
||||
|
||||
# Service account configuration
|
||||
serviceAccount:
|
||||
create: true
|
||||
name: ""
|
||||
annotations: {}
|
||||
Reference in New Issue
Block a user