feat(kubernetes): add service mesh and VPA management features

- Implemented KubernetesClusterServiceMesh component to display Istio and Linkerd metrics.
- Created KubernetesClusterVPADetail component for detailed views of Vertical Pod Autoscalers (VPA).
- Added KubernetesClusterVPAs component to list all VPAs in a cluster.
- Introduced KubernetesAlertTemplates for monitoring various Kubernetes conditions, including CrashLoopBackOff, Pod Pending, Node Not Ready, and more.
- Developed MonitorStepKubernetesMonitor interface for Kubernetes monitoring configurations.
This commit is contained in:
Nawaz Dhandala
2026-03-20 11:08:47 +00:00
parent 81eb16c1b7
commit 70e6924cdd
26 changed files with 3208 additions and 17 deletions

View File

@@ -69,13 +69,22 @@ data:
- name: persistentvolumes
mode: pull
interval: {{ .Values.resourceSpecs.interval }}
- name: horizontalpodautoscalers
mode: pull
interval: {{ .Values.resourceSpecs.interval }}
group: autoscaling
- name: verticalpodautoscalers
mode: pull
interval: {{ .Values.resourceSpecs.interval }}
group: autoscaling.k8s.io
{{- end }}
{{- if .Values.controlPlane.enabled }}
# Scrape control plane metrics via Prometheus endpoints
{{- if or .Values.controlPlane.enabled .Values.serviceMesh.enabled }}
# Scrape metrics via Prometheus endpoints (control plane and/or service mesh)
prometheus:
config:
scrape_configs:
{{- if .Values.controlPlane.enabled }}
- job_name: etcd
scheme: https
tls_config:
@@ -115,6 +124,53 @@ data:
- targets:
- {{ . | quote }}
{{- end }}
{{- end }}
{{- if and .Values.serviceMesh.enabled (eq .Values.serviceMesh.provider "istio") }}
- job_name: envoy-stats
metrics_path: /stats/prometheus
scrape_interval: {{ .Values.serviceMesh.istio.scrapeInterval }}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
action: keep
regex: istio-proxy
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:15090
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod_name
{{- end }}
{{- if and .Values.serviceMesh.enabled (eq .Values.serviceMesh.provider "linkerd") }}
- job_name: linkerd-proxy
metrics_path: /metrics
scrape_interval: {{ .Values.serviceMesh.linkerd.scrapeInterval }}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
action: keep
regex: linkerd-proxy
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: linkerd-admin
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod_name
{{- end }}
{{- end }}
processors:
@@ -196,7 +252,7 @@ data:
metrics:
receivers:
- k8s_cluster
{{- if .Values.controlPlane.enabled }}
{{- if or .Values.controlPlane.enabled .Values.serviceMesh.enabled }}
- prometheus
{{- end }}
processors:

View File

@@ -66,6 +66,24 @@ rules:
resources:
- events
verbs: ["get", "list", "watch"]
- apiGroups: ["autoscaling.k8s.io"]
resources:
- verticalpodautoscalers
verbs: ["get", "list", "watch"]
{{- if and .Values.serviceMesh.enabled (eq .Values.serviceMesh.provider "istio") }}
- apiGroups: ["networking.istio.io"]
resources:
- virtualservices
- destinationrules
- gateways
- serviceentries
verbs: ["get", "list", "watch"]
- apiGroups: ["security.istio.io"]
resources:
- peerauthentications
- authorizationpolicies
verbs: ["get", "list", "watch"]
{{- end }}
# For kubeletstats receiver
- nonResourceURLs:
- /metrics

View File

@@ -80,6 +80,16 @@ resourceSpecs:
# Collection intervals
collectionInterval: 30s
# Service mesh observability (Istio / Linkerd sidecar metrics)
serviceMesh:
enabled: false
# Supported providers: "istio", "linkerd"
provider: "istio"
istio:
scrapeInterval: 15s
linkerd:
scrapeInterval: 15s
# Service account configuration
serviceAccount:
create: true