diff --git a/apps.yaml b/apps.yaml index 7385700267..cf0e8607c8 100644 --- a/apps.yaml +++ b/apps.yaml @@ -1,7 +1,7 @@ appsInfo: alertmanager: title: Alertmanager - appVersion: 0.32.0 + appVersion: 0.32.1 repo: https://github.com/prometheus/alertmanager maintainers: Prometheus Community relatedLinks: @@ -65,7 +65,7 @@ appsInfo: integration: App Platform uses Gitea as its default repository for App Platform configuration (values). Gitea can also be used by Teams to provide application code repositories. Access to Gitea is provided by the OIDC integration in App Platform. Members of the otomi-admin and team-admin group can seamlessly sign in to Gitea. When Argo CD is enabled, App Platform will automatically create a Gitops repository for each Team in Gitea. grafana: title: Grafana - appVersion: 12.4.3 + appVersion: 13.0.1 repo: https://github.com/grafana/grafana maintainers: Grafana Labs relatedLinks: @@ -185,7 +185,7 @@ appsInfo: integration: Loki can be activated to aggregate all the container logs on the platform and store them in a storage endpoint of choice (defaults to PVC). When App Platform is configured in multi-tenancy mode, logs will be split-up between team namespaces and made available for team members only. App Platform shortcuts can be used to provide selections of logs based on interest. prometheus: title: Prometheus - appVersion: 3.11.2 + appVersion: 3.11.3 repo: https://github.com/prometheus/prometheus maintainers: Prometheus relatedLinks: diff --git a/chart/chart-index/Chart.yaml b/chart/chart-index/Chart.yaml index ac3d18e347..e89302e2c3 100644 --- a/chart/chart-index/Chart.yaml +++ b/chart/chart-index/Chart.yaml @@ -68,7 +68,7 @@ dependencies: version: v0.16.0 repository: oci://ghcr.io/kserve/charts/kserve - name: kube-prometheus-stack - version: 83.7.0 + version: 84.5.0 repository: https://prometheus-community.github.io/helm-charts - name: kyverno version: 3.7.2 diff --git a/charts/kube-prometheus-stack/Chart.lock b/charts/kube-prometheus-stack/Chart.lock index 7087b149a0..c2f71a62a4 100644 --- a/charts/kube-prometheus-stack/Chart.lock +++ b/charts/kube-prometheus-stack/Chart.lock @@ -4,15 +4,15 @@ dependencies: version: 0.0.0 - name: kube-state-metrics repository: https://prometheus-community.github.io/helm-charts - version: 7.2.2 + version: 7.3.0 - name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 4.53.1 + version: 4.55.0 - name: grafana repository: https://grafana-community.github.io/helm-charts - version: 11.6.1 + version: 12.3.0 - name: prometheus-windows-exporter repository: https://prometheus-community.github.io/helm-charts version: 0.12.6 -digest: sha256:e21304bc9748d1449437449b6e8819afeed2f1f68c473efb775f712790bdff40 -generated: "2026-04-21T14:07:57.575448317Z" +digest: sha256:0858adf3673fb8d9530ce76ab1248795804721daa770bf77c32582e5a99cd7c8 +generated: "2026-05-01T17:58:51.49109567Z" diff --git a/charts/kube-prometheus-stack/Chart.yaml b/charts/kube-prometheus-stack/Chart.yaml index 91df5293c5..46f9def7fa 100644 --- a/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kube-prometheus-stack/Chart.yaml @@ -18,15 +18,15 @@ dependencies: - condition: kubeStateMetrics.enabled name: kube-state-metrics repository: https://prometheus-community.github.io/helm-charts - version: 7.2.2 + version: 7.3.0 - condition: nodeExporter.enabled name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 4.53.1 + version: 4.55.0 - condition: grafana.enabled name: grafana repository: https://grafana-community.github.io/helm-charts - version: 11.6.1 + version: 12.3.0 - condition: windowsMonitoring.enabled name: prometheus-windows-exporter repository: https://prometheus-community.github.io/helm-charts @@ -69,4 +69,4 @@ sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus type: application -version: 83.7.0 +version: 84.5.0 diff --git a/charts/kube-prometheus-stack/charts/grafana/Chart.yaml b/charts/kube-prometheus-stack/charts/grafana/Chart.yaml index ac7a170a65..6f0f795616 100644 --- a/charts/kube-prometheus-stack/charts/grafana/Chart.yaml +++ b/charts/kube-prometheus-stack/charts/grafana/Chart.yaml @@ -6,7 +6,7 @@ annotations: - name: Upstream Project url: https://github.com/grafana/grafana apiVersion: v2 -appVersion: 12.4.3 +appVersion: 13.0.1 description: The leading tool for querying and visualizing time series and metrics. home: https://grafana.com icon: https://artifacthub.io/image/b4fed1a7-6c8f-4945-b99d-096efa3e4116 @@ -26,4 +26,4 @@ sources: - https://github.com/grafana/grafana - https://github.com/grafana-community/helm-charts type: application -version: 11.6.1 +version: 12.3.0 diff --git a/charts/kube-prometheus-stack/charts/grafana/README.md b/charts/kube-prometheus-stack/charts/grafana/README.md index fe826d9817..348429bffc 100644 --- a/charts/kube-prometheus-stack/charts/grafana/README.md +++ b/charts/kube-prometheus-stack/charts/grafana/README.md @@ -80,6 +80,10 @@ This ensures the expressions are preserved for Alertmanager instead of being ren The minimum required Kubernetes version is now 1.25. All references to deprecated APIs have been removed. +### To 12.0.0 + +Grafana 13 is configured by default. Grafana 13 explicit requires an image renderer token, which will be generated by the chart if not provided. + ## Configuration ### Example ingress with path diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl b/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl index 30322c7a2e..e25d5955f4 100644 --- a/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl +++ b/charts/kube-prometheus-stack/charts/grafana/templates/_helpers.tpl @@ -111,6 +111,19 @@ app.kubernetes.io/name: {{ include "grafana.name" . }}-image-renderer app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} +{{/* +Looks if there's an existing secret and reuse its password. If not it generates +new password and use it. +*/}} +{{- define "grafana.imageRenderer.token" -}} +{{- $secret := (lookup "v1" "Secret" (include "grafana.namespace" .) (printf "%s-image-renderer" (include "grafana.imageRenderer.fullname" .)) ) }} +{{- if $secret }} +{{- index $secret "data" "token" }} +{{- else }} +{{- (randAlphaNum 40) | b64enc | quote }} +{{- end }} +{{- end }} + {{/* Looks if there's an existing secret and reuse its password. If not it generates new password and use it. @@ -152,8 +165,8 @@ Formats imagePullSecrets. Input is (dict "root" . "imagePullSecrets" .{specific {{/* - Checks whether or not the configSecret secret has to be created - */}} + Checks whether or not the configSecret secret has to be created +*/}} {{- define "grafana.shouldCreateConfigSecret" -}} {{- $secretFound := false -}} {{- range $key, $value := .Values.datasources }} @@ -226,12 +239,12 @@ sensitiveKeys: {{- end -}} {{/* - Sidecars health port - */}} + Sidecars health port +*/}} {{/* - Give health port for alerts sidecar - */}} + Give health port for alerts sidecar +*/}} {{- define "grafana.sidecar.alerts.healthPort" -}} {{- $healthPort := 8081 -}} {{- if hasKey .Values.sidecar.alerts "startupProbe" -}} @@ -245,8 +258,8 @@ sensitiveKeys: {{- end -}} {{/* - Give health port for datasources sidecar - */}} + Give health port for datasources sidecar +*/}} {{- define "grafana.sidecar.datasources.healthPort" -}} {{- $healthPort := 8082 -}} {{- if hasKey .Values.sidecar.datasources "startupProbe" -}} @@ -260,8 +273,8 @@ sensitiveKeys: {{- end -}} {{/* - Give health port for notifiers sidecar - */}} + Give health port for notifiers sidecar +*/}} {{- define "grafana.sidecar.notifiers.healthPort" -}} {{- $healthPort := 8083 -}} {{- if hasKey .Values.sidecar.notifiers "startupProbe" -}} @@ -275,8 +288,8 @@ sensitiveKeys: {{- end -}} {{/* - Give health port for dashboards sidecar - */}} + Give health port for dashboards sidecar +*/}} {{- define "grafana.sidecar.dashboards.healthPort" -}} {{- $healthPort := 8084 -}} {{- if hasKey .Values.sidecar.dashboards "startupProbe" -}} diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl b/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl index de204db4f2..3b52a6f0d6 100644 --- a/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl +++ b/charts/kube-prometheus-stack/charts/grafana/templates/_pod.tpl @@ -1386,6 +1386,10 @@ containers: - name: {{ .name }} mountPath: {{ .mountPath }} {{- end }} + {{- if .Values.shadowBundledPlugins }} + - name: shadow-bundled-plugins + mountPath: /usr/share/grafana/data/plugins-bundled + {{- end }} ports: - name: {{ .Values.podPortName }} containerPort: {{ .Values.service.targetPort }} @@ -1450,6 +1454,11 @@ containers: {{- else }} value: {{ .Values.imageRenderer.grafanaProtocol }}://{{ include "grafana.fullname" . }}.{{ include "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }} {{- end }} + - name: GF_RENDERING_RENDERER_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.imageRenderer.existingSecret | default (printf "%s-image-renderer" (include "grafana.imageRenderer.fullname" .)) }} + key: token {{- end }} - name: GF_PATHS_DATA value: {{ (get .Values "grafana.ini").paths.data }} @@ -1715,6 +1724,10 @@ volumes: - name: {{ .name }} emptyDir: {} {{- end }} + {{- if .Values.shadowBundledPlugins }} + - name: shadow-bundled-plugins + emptyDir: {} + {{- end }} {{- with .Values.extraContainerVolumes }} {{- tpl (toYaml .) $root | nindent 2 }} {{- end }} diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/deployment.yaml b/charts/kube-prometheus-stack/charts/grafana/templates/deployment.yaml index 7bf7521550..14ceaa0ac6 100644 --- a/charts/kube-prometheus-stack/charts/grafana/templates/deployment.yaml +++ b/charts/kube-prometheus-stack/charts/grafana/templates/deployment.yaml @@ -25,6 +25,9 @@ spec: strategy: {{- toYaml . | trim | nindent 4 }} {{- end }} + {{- with .Values.progressDeadlineSeconds }} + progressDeadlineSeconds: {{ . }} + {{- end }} template: metadata: labels: diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml b/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml index 20ddff4bb6..6e9b5e4921 100644 --- a/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml +++ b/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-deployment.yaml @@ -27,6 +27,9 @@ spec: strategy: {{- toYaml . | trim | nindent 4 }} {{- end }} + {{- with .Values.imageRenderer.progressDeadlineSeconds }} + progressDeadlineSeconds: {{ . }} + {{- end }} template: metadata: labels: @@ -91,6 +94,13 @@ spec: env: - name: HTTP_PORT value: {{ .Values.imageRenderer.service.targetPort | quote }} + {{- if .Values.imageRenderer.service.enabled }} + - name: AUTH_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.imageRenderer.existingSecret | default (printf "%s-image-renderer" (include "grafana.imageRenderer.fullname" .)) }} + key: token + {{- end }} {{- if .Values.imageRenderer.serviceMonitor.enabled }} - name: ENABLE_METRICS value: "true" diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-secret.yaml b/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-secret.yaml new file mode 100644 index 0000000000..f0a1fa01b9 --- /dev/null +++ b/charts/kube-prometheus-stack/charts/grafana/templates/image-renderer-secret.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.imageRenderer.enabled .Values.imageRenderer.service.enabled (not .Values.imageRenderer.existingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "grafana.imageRenderer.fullname" . }}-image-renderer + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.imageRenderer.labels" . | nindent 4 }} + {{- with .Values.imageRenderer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.imageRenderer.service.annotations }} + annotations: + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} +data: + {{- if .Values.imageRenderer.token }} + token: {{ .Values.imageRenderer.token | b64enc | quote }} + {{- else }} + token: {{ include "grafana.imageRenderer.token" . }} + {{- end }} +{{- end }} diff --git a/charts/kube-prometheus-stack/charts/grafana/templates/validate.yaml b/charts/kube-prometheus-stack/charts/grafana/templates/validate.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/charts/kube-prometheus-stack/charts/grafana/values.yaml b/charts/kube-prometheus-stack/charts/grafana/values.yaml index 523afc585a..7a1cc79258 100644 --- a/charts/kube-prometheus-stack/charts/grafana/values.yaml +++ b/charts/kube-prometheus-stack/charts/grafana/values.yaml @@ -77,6 +77,10 @@ podDisruptionBudget: {} deploymentStrategy: type: RollingUpdate +## The maximum time in seconds for a Deployment to make progress before it is considered to be failed. +## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#progress-deadline-seconds +progressDeadlineSeconds: null + readinessProbe: httpGet: path: /api/health @@ -179,6 +183,14 @@ extraEmptyDirMounts: [] # - name: provisioning-notifiers # mountPath: /etc/grafana/provisioning/notifiers +# Shadow `/usr/share/grafana/data/plugins-bundled` with an emptyDir so plugins +# listed under `plugins:` install cleanly into `/var/lib/grafana/plugins` instead +# of failing on the read-only bundled directory shipped in the Grafana image. +# Required for plugins moved out of core in Grafana 13 (e.g. `elasticsearch`, +# `cloudwatch`) when listed in `plugins:`. Side effect: any bundled plugin not +# explicitly listed in `plugins:` will not be available. +shadowBundledPlugins: false + # Apply extra labels to common labels. extraLabels: {} @@ -190,7 +202,7 @@ downloadDashboardsImage: # -- The Docker registry registry: docker.io repository: curlimages/curl - tag: 8.19.0 + tag: 8.20.0 sha: "" pullPolicy: IfNotPresent @@ -979,7 +991,7 @@ sidecar: # -- The Docker registry registry: quay.io repository: kiwigrid/k8s-sidecar - tag: 2.6.0 + tag: 2.7.1 sha: "" imagePullPolicy: IfNotPresent resources: {} @@ -1460,6 +1472,10 @@ revisionHistoryLimit: 10 ## Add a separate remote image renderer deployment/service imageRenderer: deploymentStrategy: {} + ## The maximum time in seconds for the image renderer Deployment to make progress before it is + ## considered to be failed. + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#progress-deadline-seconds + progressDeadlineSeconds: null # Enable the image-renderer deployment & service enabled: false replicas: 1 @@ -1474,6 +1490,10 @@ imageRenderer: serverURL: "" # The callback url of grafana instances if it is not in the same namespace with the remote image renderer renderingCallbackURL: "" + # Token used for authentication between Grafana and the remote image renderer. + token: "" + # Use an existing secret for the image renderer token. Must contain a key named "token". + existingSecret: "" image: # -- The Docker registry registry: docker.io diff --git a/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml b/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml index b8dc96a112..5e01c39793 100644 --- a/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml +++ b/charts/kube-prometheus-stack/charts/kube-state-metrics/Chart.yaml @@ -26,4 +26,4 @@ name: kube-state-metrics sources: - https://github.com/kubernetes/kube-state-metrics/ type: application -version: 7.2.2 +version: 7.3.0 diff --git a/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml b/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml index 9f71fbb165..7acbe391a2 100644 --- a/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml +++ b/charts/kube-prometheus-stack/charts/kube-state-metrics/values.yaml @@ -106,7 +106,7 @@ kubeRBACProxy: image: registry: quay.io repository: brancz/kube-rbac-proxy - tag: v0.21.2 + tag: v0.22.0 sha: "" pullPolicy: IfNotPresent diff --git a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml index b016fb6876..9d3b9cfffe 100644 --- a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml +++ b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/Chart.yaml @@ -26,4 +26,4 @@ name: prometheus-node-exporter sources: - https://github.com/prometheus/node_exporter/ type: application -version: 4.53.1 +version: 4.55.0 diff --git a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/_helpers.tpl b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/_helpers.tpl index 890c487a8f..837c97b1d6 100644 --- a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/_helpers.tpl +++ b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/templates/_helpers.tpl @@ -79,15 +79,15 @@ The image to use {{- fail "image.sha forbidden. Use image.digest instead" }} {{- else if .Values.image.digest }} {{- if .Values.global.imageRegistry }} -{{- printf "%s/%s:%s@%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- printf "%s/%s:%s%s@%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) (ternary "-distroless" "" .Values.image.distroless) .Values.image.digest }} {{- else }} -{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- printf "%s/%s:%s%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) (ternary "-distroless" "" .Values.image.distroless) .Values.image.digest }} {{- end }} {{- else }} {{- if .Values.global.imageRegistry }} -{{- printf "%s/%s:%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- printf "%s/%s:%s%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) (ternary "-distroless" "" .Values.image.distroless) }} {{- else }} -{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- printf "%s/%s:%s%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) (ternary "-distroless" "" .Values.image.distroless) }} {{- end }} {{- end }} {{- end }} @@ -202,8 +202,8 @@ labelValueLengthLimit: {{ . }} {{- end }} {{/* -The default node affinity to exclude -- AWS Fargate +The default node affinity to exclude +- AWS Fargate - Azure virtual nodes */}} {{- define "prometheus-node-exporter.defaultAffinity" -}} @@ -212,11 +212,11 @@ nodeAffinity: nodeSelectorTerms: - matchExpressions: - key: eks.amazonaws.com/compute-type - operator: NotIn + operator: NotIn # codespell:ignore values: - fargate - key: type - operator: NotIn + operator: NotIn # codespell:ignore values: - virtual-kubelet {{- end -}} diff --git a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml index c58ee4e687..1bb18f2bac 100644 --- a/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml +++ b/charts/kube-prometheus-stack/charts/prometheus-node-exporter/values.yaml @@ -6,6 +6,8 @@ image: repository: prometheus/node-exporter # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} tag: "" + # Use the distroless image variant. + distroless: false pullPolicy: IfNotPresent digest: "" @@ -45,7 +47,7 @@ kubeRBACProxy: image: registry: quay.io repository: brancz/kube-rbac-proxy - tag: v0.21.2 + tag: v0.22.0 sha: "" pullPolicy: IfNotPresent @@ -412,7 +414,7 @@ hostSysFsMount: ## Assign a group of affinity scheduling rules ## The default nodeAffinity excludes Fargate nodes and virtual kubelets from scheduling -## unless overriden by hard node affinity set in the field. +## unless overridden by hard node affinity set in the field. affinity: {} # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: diff --git a/charts/kube-prometheus-stack/templates/_helpers.tpl b/charts/kube-prometheus-stack/templates/_helpers.tpl index 3c1fe6a6cf..8fb4041c7a 100644 --- a/charts/kube-prometheus-stack/templates/_helpers.tpl +++ b/charts/kube-prometheus-stack/templates/_helpers.tpl @@ -212,6 +212,17 @@ Allow kube-proxy job name to be overridden {{- end -}} {{- end -}} +{{/* +Allow kube-apiserver job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-apiserver.name" -}} + {{- if index .Values "kubeApiServer" "jobNameOverride" -}} + {{- index .Values "kubeApiServer" "jobNameOverride" -}} + {{- else -}} + {{- print "apiserver" -}} + {{- end -}} +{{- end -}} + {{/* Allow kube-state-metrics job name to be overridden */}} diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml index 940e76e280..e04ec9efa6 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: v1 kind: ConfigMap metadata: @@ -20,7 +21,7 @@ metadata: {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} data: apiserver.json: |- - {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.","gridPos":{"h":2,"w":24,"x":0,"y":0},"id":1,"options":{"content":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."},"pluginVersion":"v11.4.0","title":"Notice","type":"text"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":8,"x":0,"y":2},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}"}],"title":"Availability (30d) > 99.000%","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How much error budget is left looking at our 0.990% availability guarantees?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100},"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":16,"x":8,"y":2},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)","legendFormat":"errorbudget"}],"title":"ErrorBudget (30d) > 99.000%","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":9},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}"}],"title":"Read Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many read requests (LIST,GET) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":9},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Read SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":9},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Read SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for reading (LIST|GET) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Read SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":16},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}"}],"title":"Write Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":16},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Write SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":16},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Write SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":16},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Write SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":23},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":23},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":30},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":37},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":37},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":37},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"apiserver\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"name":"instance","query":"label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / API server","uid":"09ec8aa1e996d6ffcd6817bbaff4db1b"}`}} + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.","gridPos":{"h":2,"w":24,"x":0,"y":0},"id":1,"options":{"content":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."},"pluginVersion":"v11.4.0","title":"Notice","type":"text"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":8,"x":0,"y":2},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}"}],"title":"Availability (30d) > 99.000%","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How much error budget is left looking at our 0.990% availability guarantees?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100},"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":16,"x":8,"y":2},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)","legendFormat":"errorbudget"}],"title":"ErrorBudget (30d) > 99.000%","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":9},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}"}],"title":"Read Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many read requests (LIST,GET) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":9},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Read SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":9},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Read SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for reading (LIST|GET) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Read SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":16},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}"}],"title":"Write Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":16},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Write SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":16},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Write SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":16},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Write SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":23},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":23},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":30},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":37},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":37},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":37},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeApiserverJob }}{{`\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"name":"instance","query":"label_values(up{job=\"`}}{{ $kubeApiserverJob }}{{`\", cluster=\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / API server","uid":"09ec8aa1e996d6ffcd6817bbaff4db1b"}`}} {{- end }} --- {{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml index 8905448878..4144d9e5e7 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml @@ -20,7 +20,7 @@ metadata: {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} data: k8s-resources-windows-cluster.json: |- - {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode=\"idle\"}[$__rate_interval]))","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":8,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Memory Usage"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Requests"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Limits"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":10,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"Memory Requests by Namespace","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","namespace":"Namespace"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Cluster(Windows)","uid":"4d08557fd9391b100730f2494bccac68"}`}} + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode=\"idle\"}[$__rate_interval]))","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":8,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Memory Usage"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Requests"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Limits"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":10,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"Memory Requests by Namespace","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","namespace":"Namespace"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Cluster(Windows)","uid":"4d08557fd9391b100730f2494bccac68"}`}} {{- end }} --- {{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml index a7cf05e448..a5c099efb6 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml @@ -20,7 +20,7 @@ metadata: {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} data: k8s-resources-windows-namespace.json: |- - {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace(Windows)","uid":"490b402361724ab1d4c45666c1fa9b6f"}`}} + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace(Windows)","uid":"490b402361724ab1d4c45666c1fa9b6f"}`}} {{- end }} --- {{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml index cb4720f317..376850b32c 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml @@ -20,7 +20,7 @@ metadata: {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} data: k8s-resources-windows-pod.json: |- - {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Received : {{ container }}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Transmitted : {{ container }}"}],"title":"Network I/O","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Pod(Windows)","uid":"40597a704a610e936dc6ed374a7ce023"}`}} + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Received : {{ container }}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Transmitted : {{ container }}"}],"title":"Network I/O","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Pod(Windows)","uid":"40597a704a610e936dc6ed374a7ce023"}`}} {{- end }} --- {{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} diff --git a/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml b/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml index 90d612851a..67bd9d2d1f 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/prometheus.yaml @@ -404,6 +404,9 @@ spec: {{- if not .Values.prometheus.agentMode }} {{- if .Values.prometheus.prometheusSpec.thanos }} thanos: +{{- with .Values.prometheus.prometheusSpec.thanos.image }} + image: {{ . }} +{{- end }} {{- with (omit .Values.prometheus.prometheusSpec.thanos "objectStorageConfig" "secretProviderClass") }} {{ toYaml . | indent 4 }} {{- end }} diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml index 8af24e334b..d36b10e2cf 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverAvailability }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -194,7 +195,7 @@ spec: {{- end }} {{- end }} record: apiserver_request:availability30d - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[5m])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} @@ -206,7 +207,7 @@ spec: {{- end }} {{- end }} record: code_resource:apiserver_request_total:rate5m - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} @@ -218,7 +219,7 @@ spec: {{- end }} {{- end }} record: code_resource:apiserver_request_total:rate5m - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) record: code_verb:apiserver_request_total:increase1h {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} labels: @@ -229,7 +230,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) record: code_verb:apiserver_request_total:increase1h {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} labels: @@ -240,7 +241,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) record: code_verb:apiserver_request_total:increase1h {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} labels: @@ -251,7 +252,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} - - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) record: code_verb:apiserver_request_total:increase1h {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} labels: diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml index 5371e13d6d..4777a803b9 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverBurnrate }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -28,26 +29,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1d])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1d])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1d])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[1d])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[1d])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -63,26 +64,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1h])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1h])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1h])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[1h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[1h])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -98,26 +99,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[2h])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[2h])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[2h])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[2h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[2h])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -133,26 +134,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[30m])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[30m])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[30m])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[30m])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[30m])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -168,26 +169,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[3d])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[3d])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[3d])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[3d])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[3d])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -203,26 +204,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[5m])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[5m])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[5m])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[5m])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[5m])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -238,26 +239,26 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - ( ( - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[6h])) or vector(0) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[6h])) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[6h])) ) ) + # errors - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[6h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[6h])) labels: verb: read {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -273,15 +274,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1d])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[1d])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -297,15 +298,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1h])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[1h])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -321,15 +322,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[2h])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[2h])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -345,15 +346,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[30m])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[30m])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -369,15 +370,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[3d])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[3d])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -393,15 +394,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[5m])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[5m])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} @@ -417,15 +418,15 @@ spec: ( ( # too slow - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[6h])) ) + - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) ) / - sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[6h])) labels: verb: write {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml index 6abcc50d5e..ed5df43ce3 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverHistogram }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -24,7 +25,7 @@ spec: groups: - name: kube-apiserver-histogram.rules rules: - - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: '0.99' verb: read @@ -37,7 +38,7 @@ spec: {{- end }} {{- end }} record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile - - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 labels: quantile: '0.99' verb: write diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml index 99d7e795fd..83460ff4a2 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -37,9 +38,9 @@ spec: runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: |- - histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="{{ $kubeApiserverJob }}"}[5m]))) < 604800 and - on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 + on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="{{ $kubeApiserverJob }}"} > 0 for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" @@ -68,9 +69,9 @@ spec: runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration summary: Client certificate is about to expire. expr: |- - histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="{{ $kubeApiserverJob }}"}[5m]))) < 86400 and - on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 + on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="{{ $kubeApiserverJob }}"} > 0 for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" @@ -98,7 +99,7 @@ spec: description: Kubernetes aggregated API {{`{{`}} $labels.instance {{`}}`}}/{{`{{`}} $labels.name {{`}}`}} has reported {{`{{`}} $labels.reason {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapierrors summary: Kubernetes aggregated API has reported errors. - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[1m])) > 0 + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="{{ $kubeApiserverJob }}"}[1m])) > 0 for: {{ dig "KubeAggregatedAPIErrors" "for" "10m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" @@ -126,7 +127,7 @@ spec: description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapidown summary: Kubernetes aggregated API is down. - expr: (1 - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85 + expr: (1 - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="{{ $kubeApiserverJob }}"}[10m]))) * 100 < 85 for: {{ dig "KubeAggregatedAPIDown" "for" "5m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" @@ -155,7 +156,7 @@ spec: description: KubeAPI has disappeared from Prometheus target discovery. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapidown summary: Target disappeared from Prometheus target discovery. - expr: absent(up{job="apiserver"}) + expr: absent(up{job="{{ $kubeApiserverJob }}"}) for: {{ dig "KubeAPIDown" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" @@ -184,7 +185,7 @@ spec: description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}. runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiterminatedrequests summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver"}[10m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="{{ $kubeApiserverJob }}"}[10m])) / ( sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}"}[10m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="{{ $kubeApiserverJob }}"}[10m])) ) > 0.20 for: {{ dig "KubeAPITerminatedRequests" "for" "5m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} keep_firing_for: "{{ . }}" diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml index e46fda9e76..e047089601 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml @@ -5,6 +5,7 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-promet */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} {{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: @@ -65,9 +66,9 @@ spec: runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclienterrors summary: Kubernetes API server client is experiencing errors. expr: |- - (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace) + (sum(rate(rest_client_requests_total{job="{{ $kubeApiserverJob }}",code=~"5.."}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace) / - sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace)) + sum(rate(rest_client_requests_total{job="{{ $kubeApiserverJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace)) > 0.01 for: {{ dig "KubeClientErrors" "for" "15m" .Values.customRules }} {{- with .Values.defaultRules.keepFiringFor }} diff --git a/charts/kube-prometheus-stack/values.yaml b/charts/kube-prometheus-stack/values.yaml index 9325322dad..741365afe1 100644 --- a/charts/kube-prometheus-stack/values.yaml +++ b/charts/kube-prometheus-stack/values.yaml @@ -956,7 +956,7 @@ alertmanager: image: registry: quay.io repository: prometheus/alertmanager - tag: v0.32.0 + tag: v0.32.1 sha: "" pullPolicy: IfNotPresent @@ -1683,6 +1683,11 @@ kubeApiServer: ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor targetLabels: [] + ## Override the job label used for the apiserver. + ## This allows users who scrape apiserver metrics under a different job name (e.g. k3s-server via PushProx) + ## to align the recording rules and alerts with their actual job label. + jobNameOverride: "" + ## Component scraping the kubelet and kubelet-hosted cAdvisor ## kubelet: @@ -2999,7 +3004,7 @@ prometheusOperator: image: registry: ghcr.io repository: jkroepke/kube-webhook-certgen - tag: 1.8.1 + tag: 1.8.2 sha: "" pullPolicy: IfNotPresent resources: {} @@ -4154,7 +4159,7 @@ prometheus: image: registry: quay.io repository: prometheus/prometheus - tag: v3.11.2 + tag: v3.11.3 sha: "" pullPolicy: IfNotPresent @@ -4645,6 +4650,7 @@ prometheus: ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#thanosspec ## thanos: {} + # image: quay.io/thanos/thanos # secretProviderClass: # provider: gcp # parameters: