Skip to content

Instantly share code, notes, and snippets.

@anadimisra
Last active September 17, 2023 05:09
Show Gist options
  • Save anadimisra/e7cd377bca32deaf6ecb906fd06d63bf to your computer and use it in GitHub Desktop.
Save anadimisra/e7cd377bca32deaf6ecb906fd06d63bf to your computer and use it in GitHub Desktop.
Values YAML overrides for deploying Prometheus, Grafana on EKS and dashboard for spring-boot microservices.
replicas: 1
image:
repository: docker.io/grafana/grafana
tag: "9.5.2"
sha: ""
pullPolicy: IfNotPresent
testFramework:
enabled: true
image: docker.io/bats/bats
tag: "v1.4.1"
imagePullPolicy: IfNotPresent
securityContext: {}
securityContext:
runAsNonRoot: true
runAsUser: 472
runAsGroup: 472
fsGroup: 472
containerSecurityContext:
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
createConfigmap: true
downloadDashboardsImage:
repository: docker.io/curlimages/curl
tag: 7.85.0
sha: ""
pullPolicy: IfNotPresent
service:
enabled: true
type: ClusterIP
port: 80
targetPort: 3000
portName: service
persistence:
type: pvc
enabled: false
storageClassName: eks-efs-storage-class
accessModes:
- ReadWriteOnce
size: 10Gi
# annotations: {}
finalizers:
- kubernetes.io/pvc-protection
# selectorLabels: {}
## Sub-directory of the PV to mount. Can be templated.
# subPath: ""
## Name of an existing PVC. Can be templated.
existingClaim: grafana-volume-0
# Administrator credentials when not using an existing secret (see below)
adminUser: admin
# Use an existing secret for the admin user.
admin:
## Name of the secret. Can be templated.
existingSecret: ""
userKey: admin-user
passwordKey: admin-password
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
version: 1
url: http://prometheus-server:80
access: proxy
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
grafana.ini:
paths:
data: /var/lib/grafana/
logs: /var/log/grafana
plugins: /var/lib/grafana/plugins
provisioning: /etc/grafana/provisioning
analytics:
check_for_updates: true
log:
mode: console
grafana_net:
url: https://grafana.net
server:
domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ .Values.ingress.hosts | first }}{{ else }}''{{ end }}"
## Sidecars that collect the configmaps with specified label and stores the included files them into the respective folders
## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards
sidecar:
image:
repository: quay.io/kiwigrid/k8s-sidecar
tag: 1.22.0
sha: ""
imagePullPolicy: IfNotPresent
resources: {}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
revisionHistoryLimit: 10
imageRenderer:
deploymentStrategy: {}
# Enable the image-renderer deployment & service
enabled: false
replicas: 1
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 5
targetCPU: "60"
targetMemory: ""
behavior: {}
image:
repository: docker.io/grafana/grafana-image-renderer
tag: latest
sha: ""
pullPolicy: Always
env:
HTTP_HOST: "0.0.0.0"
containerSecurityContext:
seccompProfile:
type: RuntimeDefault
capabilities:
drop: ['ALL']
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
service:
enabled: true
portName: 'http'
port: 8081
targetPort: 8081
grafanaProtocol: http
podPortName: http
revisionHistoryLimit: 10
networkPolicy:
limitIngress: true
limitEgress: false
extraIngressSelectors: []
server:
## Prometheus server container image
##
image:
repository: quay.io/prometheus/prometheus
# if not set appVersion field from Chart.yaml is used
tag: ""
# When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value).
digest: ""
pullPolicy: IfNotPresent
extraFlags:
- web.enable-lifecycle
## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL)
- storage.tsdb.wal-compression
## Path to a configuration file on prometheus server container FS
configPath: /etc/config/prometheus.yml
### The data directory used by prometheus to set --storage.tsdb.path
### When empty server.persistentVolume.mountPath is used instead
storagePath: ""
global:
## How frequently to scrape targets by default
##
scrape_interval: 1m
## How long until a scrape request times out
##
scrape_timeout: 10s
## How frequently to evaluate rules
##
evaluation_interval: 1m
## Server Deployment Strategy type
strategy:
type: Recreate
## Node labels for Prometheus server pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
## enable this only if using karpenter for auto scaling worker nodes
nodeSelector:
karpenter.sh/provisioner-name: on-demand
## PodDisruptionBudget settings
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/
##
podDisruptionBudget:
enabled: false
maxUnavailable: 1
persistentVolume:
## If true, Prometheus server will create/use a Persistent Volume Claim
## If false, use emptyDir
##
enabled: true
## Prometheus server data Persistent Volume access modes
## Must match those of existing PV or dynamic provisioner
## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
##
accessModes:
- ReadWriteOnce
## Prometheus server data Persistent Volume mount root path
##
mountPath: /data
## Prometheus server data Persistent Volume size
##
size: 20Gi
## Prometheus server data Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
storageClass: "eks-efs-storage-class"
## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
##
replicaCount: 2
statefulSet:
## If true, use a statefulset instead of a deployment for pod management.
## This allows to scale replicas to more than 1 pod
##
enabled: true
annotations: {}
labels: {}
podManagementPolicy: OrderedReady
## Alertmanager headless service to use for the statefulset
##
headless:
annotations: {}
labels: {}
servicePort: 80
## Enable gRPC port on service to allow auto discovery with thanos-querier
gRPC:
enabled: false
servicePort: 10901
## Prometheus server readiness and liveness probe initial delay and timeout
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
##
tcpSocketProbeEnabled: false
probeScheme: HTTP
readinessProbeInitialDelay: 30
readinessProbePeriodSeconds: 5
readinessProbeTimeout: 4
readinessProbeFailureThreshold: 3
readinessProbeSuccessThreshold: 1
livenessProbeInitialDelay: 30
livenessProbePeriodSeconds: 15
livenessProbeTimeout: 10
livenessProbeFailureThreshold: 3
livenessProbeSuccessThreshold: 1
startupProbe:
enabled: false
periodSeconds: 5
failureThreshold: 30
timeoutSeconds: 10
## Prometheus server resource requests and limits
## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
# Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico),
# because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working
##
hostNetwork: false
# When hostNetwork is enabled, this will set to ClusterFirstWithHostNet automatically
dnsPolicy: ClusterFirst
## Vertical Pod Autoscaler config
## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
verticalAutoscaler:
## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs)
enabled: false
# updateMode: "Auto"
# containerPolicies:
# - containerName: 'prometheus-server'
## Security context to be added to server pods
##
securityContext:
runAsUser: 65534
runAsNonRoot: true
runAsGroup: 65534
fsGroup: 65534
service:
## If false, no Service will be created for the Prometheus server
##
enabled: true
servicePort: 80
sessionAffinity: None
type: ClusterIP
## Enable gRPC port on service to allow auto discovery with thanos-querier
gRPC:
enabled: false
servicePort: 10901
statefulsetReplica:
enabled: false
replica: 1
## Prometheus server pod termination grace period
##
terminationGracePeriodSeconds: 300
## Prometheus data retention period (default if not specified is 15 days)
##
retention: "30d"
## Records configuration
## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
recording_rules.yml: {}
## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml
rules: {}
prometheus.yml:
rule_files:
- /etc/config/recording_rules.yml
- /etc/config/alerting_rules.yml
## Below two files are DEPRECATED will be removed from this default values file
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics
- job_name: 'kubernetes-nodes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# This configuration will work only on kubelet 1.7.3+
# As the scrape endpoints for cAdvisor have changed
# if you are using older version you need to change the replacement to
# replacement: /api/v1/nodes/$1:4194/proxy/metrics
# more info here https://github.com/coreos/prometheus-operator/issues/633
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
# Metric relabel configs to apply to samples before ingestion.
# [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs)
# metric_relabel_configs:
# - action: labeldrop
# regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone)
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints'
honor_labels: true
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
# Scrape config for slow service endpoints; same as above, but with a larger
# timeout and a larger interval
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node
- job_name: 'prometheus-pushgateway'
honor_labels: true
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: pushgateway
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe services that have a value of `true`
- job_name: 'kubernetes-services'
honor_labels: true
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: service
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
# except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods'
honor_labels: true
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
# Example Scrape config for pods which should be scraped slower. An useful example
# would be stackriver-exporter which queries an API on every scrape of the pod
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods-slow'
honor_labels: true
scrape_interval: 5m
scrape_timeout: 30s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
# Add your other pods scrape jobs here
networkPolicy:
## Enable creation of NetworkPolicy resources.
##
enabled: false
## kube-state-metrics sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
##
kube-state-metrics:
## If false, kube-state-metrics sub-chart will not be installed
##
enabled: true
## promtheus-node-exporter sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
##
prometheus-node-exporter:
## If false, node-exporter will not be installed
##
enabled: true
rbac:
pspEnabled: false
containerSecurityContext:
allowPrivilegeEscalation: false
## pprometheus-pushgateway sub-chart configurable values
## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway
##
prometheus-pushgateway:
## If false, pushgateway will not be installed
##
enabled: true
# Optional service annotations
serviceAnnotations:
prometheus.io/probe: pushgateway
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 5,
"iteration": 1694882549461,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"panels": [],
"title": "Quick Facts",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Select pod to see uptime.",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 6,
"x": 0,
"y": 1
},
"id": 6,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "process_uptime_seconds{pod=~\"$pod\", namespace=~\"$namespace\"} ",
"interval": "",
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"title": "Uptime",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Total permissible and current allocated memory per pd.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 9,
"x": 6,
"y": 1
},
"id": 247,
"options": {
"legend": {
"calcs": [
"min",
"max"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "kube_pod_container_resource_limits{namespace=~\"$namespace\",pod=~\"$pod\"}",
"interval": "",
"legendFormat": "{{pod}}",
"refId": "A"
}
],
"title": "Current Memory Allocation",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Average memory consumption by app containers in each pod.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 9,
"x": 15,
"y": 1
},
"id": 196,
"options": {
"legend": {
"calcs": [
"min",
"max"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "avg(container_memory_usage_bytes{namespace=~\"$namespace\",pod=~\"$pod\",container=~\"$container\",container!=\"\",container!=\"POD\"}) by (container)",
"interval": "",
"legendFormat": "{{container}}",
"refId": "A"
}
],
"title": "Current Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 3,
"x": 0,
"y": 11
},
"id": 210,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "kube_pod_container_status_last_terminated_reason{exported_namespace=\"uat\",pod=\"fabric-ppm-service-86bc4bdc46-4bsqt\", reason=\"OOMKilled\"}",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "OOM Restart",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 3,
"x": 3,
"y": 11
},
"id": 248,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "kube_pod_container_status_last_terminated_reason{exported_namespace=~\"$namespace\",pod=\"$pod\", reason=\"Error\"}",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Error Restart",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 16
},
"id": 38,
"links": [],
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": false,
"expr": "system_load_average_1m{pod=~\"$pod\", namespace=~\"$namespace\"}",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": false,
"expr": "system_cpu_count{pod=~\"$pod\", namespace=~\"$namespace\"}",
"hide": false,
"interval": "",
"intervalFactor": 2,
"legendFormat": "cpus",
"refId": "B"
}
],
"title": "Load",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 1,
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 16
},
"id": 36,
"links": [],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "system_cpu_usage{pod=~\"$pod\", namespace=~\"$namespace\"}",
"interval": "",
"legendFormat": "{{pod}} | system",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "process_cpu_usage{pod=~\"$pod\", namespace=~\"$namespace\"}",
"hide": false,
"interval": "",
"legendFormat": "{{pod}} | process",
"refId": "B"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-GrYlRd"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 24,
"x": 0,
"y": 26
},
"id": 106,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "sum(increase(log4j2_events_total{pod=~\"$pod\", namespace=~\"$namespace\", level=~\"error|fatal|warn\"}[$__rate_interval])) by (pod,level)",
"interval": "",
"legendFormat": "{{pod}} - {{ level }}",
"refId": "A"
}
],
"title": "Fatal, Error and Warn logs count",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 37
},
"id": 118,
"panels": [],
"title": "Mongo",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Total time taken per method per repository, a high value indicates perf issue.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 38
},
"id": 218,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"min",
"max",
"mean"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(spring_data_repository_invocations_seconds_sum{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} - {{repository}} - {{method}}",
"refId": "A"
}
],
"title": "Repository Method Execution Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Number of methods executed against each of repositories, gives an idea of traffic on each collection",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 38
},
"id": 219,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(spring_data_repository_invocations_seconds_count{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} - {{repository}} - {{method}}",
"refId": "A"
}
],
"title": "Repository Method Execution Count",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Total time taken per method per repository, a high value indicates perf issue.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 46
},
"id": 222,
"options": {
"displayLabels": [
"value"
],
"legend": {
"displayMode": "table",
"placement": "bottom",
"values": [
"value"
]
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(spring_data_repository_invocations_seconds_sum{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])/increase(spring_data_repository_invocations_seconds_count{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} - {{repository}} - {{method}}",
"refId": "A"
}
],
"title": "Repository Method Avg Execution Time",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Max recorded time of each mongo operation",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 46
},
"id": 223,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "spring_data_repository_invocations_seconds_max{namespace=~\"$namespace\",pod=~\"$pod\"}",
"interval": "",
"legendFormat": "{{pod}} - {{repository}} - {{method}}",
"refId": "A"
}
],
"title": "Repository Method Execution Max Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "The maximum time in seconds it took to execute a particular command on a MongoDB collection. This is the worst case unit. You can use it to identify the slowest operations per collection in your app.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 54
},
"id": 122,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"min",
"max",
"mean"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(mongodb_driver_commands_seconds_sum{pod=~\"$pod\", namespace=~\"$namespace\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} | {{command}} | {{collection}}",
"refId": "A"
}
],
"title": "Time in Seconds of Operations Per Collection",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 54
},
"id": 234,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(mongodb_driver_commands_seconds_count{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} - {{collection}} - {{command}}",
"refId": "A"
}
],
"title": "Number of Commands Per collection Per second",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 63
},
"id": 225,
"options": {
"displayLabels": [
"value"
],
"legend": {
"displayMode": "table",
"placement": "right",
"values": [
"value"
]
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "increase(mongodb_driver_commands_seconds_sum{pod=~\"$pod\", namespace=~\"$namespace\"}[$__rate_interval])/increase(mongodb_driver_commands_seconds_count{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])",
"interval": "",
"legendFormat": "{{pod}} - {{ collection }} - {{command}}",
"refId": "A"
}
],
"title": "Operations Average Time Per Collection",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 63
},
"id": 226,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "mongodb_driver_commands_seconds_max{pod=~\"$pod\", namespace=~\"$namespace\"}",
"interval": "",
"legendFormat": "{{pod}} - {{collection}} - {{command}}",
"refId": "A"
}
],
"title": "Mongo Operation Max Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "The current size of the wait queue for a connection from the pool",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 72
},
"id": 132,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "right"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "mongodb_driver_pool_waitqueuesize{pod=~\"$pod\", namespace=~\"$namespace\"}",
"interval": "",
"legendFormat": "{{pod}} | {{namespace}}",
"refId": "A",
"visualQuery": {
"labels": [
{
"label": "pod",
"op": "=",
"value": "domain-data-api-9857d678d-56ssd"
},
{
"label": "namespace",
"op": "=",
"value": "dev"
}
],
"metric": "mongodb_driver_commands_seconds_max",
"operations": []
}
}
],
"title": "Mongodb Driver Pool Waitqueuesize",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Count of connections that are currently in use",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 72
},
"id": 129,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "list",
"placement": "right"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "mongodb_driver_pool_checkedout{pod=~\"$pod\", namespace=~\"$namespace\"}",
"interval": "",
"legendFormat": "{{pod}}",
"refId": "A",
"visualQuery": {
"labels": [
{
"label": "pod",
"op": "=",
"value": "domain-data-api-9857d678d-56ssd"
},
{
"label": "namespace",
"op": "=",
"value": "dev"
}
],
"metric": "mongodb_driver_commands_seconds_max",
"operations": []
}
}
],
"title": "Mongodb Driver Pool Checkedout",
"transformations": [],
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 82
},
"id": 24,
"panels": [],
"title": "JVM Memory",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {
"leftLogBase": 1,
"rightLogBase": 1
},
"gridPos": {
"h": 7,
"w": 7,
"x": 0,
"y": 83
},
"hiddenSeries": false,
"id": 26,
"legend": {
"avg": false,
"current": true,
"max": true,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_used_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"heap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "used",
"metric": "",
"refId": "A",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_committed_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"heap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "committed",
"refId": "B",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_max_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"heap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "max",
"refId": "C",
"step": 2400
}
],
"thresholds": [],
"timeRegions": [],
"title": "JVM Heap",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"x-axis": true,
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"y-axis": true,
"y_formats": [
"mbytes",
"short"
],
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": 0,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {
"leftLogBase": 1,
"rightLogBase": 1
},
"gridPos": {
"h": 7,
"w": 9,
"x": 7,
"y": 83
},
"hiddenSeries": false,
"id": 28,
"legend": {
"avg": false,
"current": true,
"max": true,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_used_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"nonheap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "used",
"metric": "",
"refId": "A",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_committed_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"nonheap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "committed",
"refId": "B",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_max_bytes{pod=~\"$pod\", namespace=~\"$namespace\", area=~\"nonheap\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "max",
"refId": "C",
"step": 2400
}
],
"thresholds": [],
"timeRegions": [],
"title": "JVM Non-Heap",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"x-axis": true,
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"y-axis": true,
"y_formats": [
"mbytes",
"short"
],
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": 0,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "",
"editable": true,
"error": false,
"fill": 1,
"fillGradient": 0,
"grid": {
"leftLogBase": 1,
"rightLogBase": 1
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 83
},
"hiddenSeries": false,
"id": 30,
"legend": {
"alignAsTable": false,
"avg": false,
"current": true,
"max": true,
"min": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.3",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_used_bytes{pod=~\"$pod\", namespace=~\"$namespace\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "used",
"metric": "",
"refId": "A",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_committed_bytes{pod=~\"$pod\", namespace=~\"$namespace\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "committed",
"refId": "B",
"step": 2400
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum(jvm_memory_max_bytes{pod=\"$pod\", namespace=\"$namespace\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "max",
"refId": "C",
"step": 2400
}
],
"thresholds": [],
"timeRegions": [],
"title": "JVM Total",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"x-axis": true,
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"y-axis": true,
"y_formats": [
"mbytes",
"short"
],
"yaxes": [
{
"format": "bytes",
"label": "",
"logBase": 1,
"min": 0,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 90
},
"id": 172,
"panels": [],
"title": "Garbage Collection",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "An approximation of the percent of CPU time used by GC activities over the last lookback period or since monitoring began, whichever is shorter, in the range [0..1]",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 0,
"y": 91
},
"id": 174,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "jvm_gc_overhead_percent{namespace=~\"$namespace\", pod=~\"$pod\"}",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "CPU time used by GC",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Count of positive increases in the size of the old generation memory pool before GC to after GC",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 4,
"y": 91
},
"id": 180,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"repeatDirection": "h",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "jvm_gc_memory_promoted_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Heap Gain Byte Size",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Max size of long-lived heap memory pool",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 4,
"x": 8,
"y": 91
},
"id": 179,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.3",
"repeatDirection": "h",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": 1,
"exemplar": true,
"expr": "jvm_gc_max_data_size_bytes{namespace=~\"$namespace\", pod=~\"$pod\"}",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "JVM GC Max Data Size Bytes",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Size of long-lived heap memory pool after reclamation",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [