Skip to content

Instantly share code, notes, and snippets.

@aiwantaozi
Last active November 24, 2022 17:39
Show Gist options
  • Save aiwantaozi/45511575d47101b49a8883acf19ab394 to your computer and use it in GitHub Desktop.
Save aiwantaozi/45511575d47101b49a8883acf19ab394 to your computer and use it in GitHub Desktop.
all metrics

Cluster Metrics

cluster_memory_usage_percent

1 - sum(node_memory_MemAvailable_bytes{instance=~".*"}) by () / sum(node_memory_MemTotal_bytes{instance=~".*"}) by ()

cluster_network_transmit_packets_dropped_sum_rate

sum(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_cpu_load_5

sum(node_load1{instance=~".*"}) by () / count(node_cpu_seconds_total{mode="system",instance=~".*"}) by ()

cluster_cpu_user_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="user"}[5m])) by ()

cluster_cpu_system_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="system"}[5m])) by ()

cluster_memory_page_in_bytes_sum_rate

1e3 * sum(rate(node_vmstat_pgpgin{instance=~".*"}[5m])) by ()

cluster_disk_io_writes_bytes_sum_rate

sum(rate(node_disk_writes_bytes_total{instance=~".*"}[5m])) by ()

cluster_fs_usage_percent

(sum(node_filesystem_size_bytes{device!="rootfs",instance=~".*"}) by (instance) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~".*"}) by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~".*"}) by (instance)

cluster_network_receive_bytes_sum_rate

sum(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_cpu_load_1

sum(node_load5{instance=~".*"}) by () / count(node_cpu_seconds_total{mode="system",instance=~".*"}) by ()

cluster_cpu_usage_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="idle", mode!="iowait", mode!~"^(?:guest.*)$"}[5m])) by ()

cluster_network_receive_errors_sum_rate

sum(node_network_receive_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_network_receive_packets_sum_rate

sum(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_network_transmit_errors_sum_rate

sum(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_cpu_load_15

sum(node_load15{instance=~".*"}) by () / count(node_cpu_seconds_total{mode="system",instance=~".*"}) by ()

cluster_disk_io_reads_bytes_sum_rate

sum(rate(node_disk_read_bytes_total{instance=~".*"}[5m])) by ()

cluster_network_transmit_bytes_sum_rate

sum(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_network_transmit_packets_sum_rate

sum(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~".*"}) by ()

cluster_memory_page_out_bytes_sum_rate

1e3 * sum(rate(node_vmstat_pgpgout{instance=~".*"}[5m])) by ()

cluster_network_receive_packets_dropped_sum_rate

``` sum(node_network_receive_drop_total{device!"lo|veth.|docker.|flannel.|cali.|cbr.*",instance=".*"}) by () `

Node Metrics

node_cpu_load_1

sum(node_load5{instance=~"10.0.2.15.*"}) by (instance) / count(node_cpu_seconds_total{mode="system",instance=~"10.0.2.15.*"}) by (instance)

node_disk_io_reads_bytes_sum_rate

sum(rate(node_disk_read_bytes_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

node_network_transmit_bytes_sum_rate

sum(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_network_transmit_errors_sum_rate

sum(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_cpu_load_5

sum(node_load1{instance=~"10.0.2.15.*"}) by (instance) / count(node_cpu_seconds_total{mode="system",instance=~"10.0.2.15.*"}) by (instance)

node_cpu_usage_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="idle", mode!="iowait", mode!~"^(?:guest.*)$"}[5m])) by (instance)

node_cpu_system_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="system"}[5m])) by (instance)

node_memory_page_out_bytes_sum_rate

1e3 * sum(rate(node_vmstat_pgpgout{instance=~"10.0.2.15.*"}[5m])) by (instance)

node_network_receive_bytes_sum_rate

sum(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_network_transmit_packets_dropped_sum_rate

sum(node_network_transmit_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_cpu_load_15

sum(node_load15{instance=~"10.0.2.15.*"}) by (instance) / count(node_cpu_seconds_total{mode="system",instance=~"10.0.2.15.*"}) by (instance)

node_disk_io_writes_bytes_sum_rate

sum(rate(node_disk_writes_bytes_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

node_fs_usage_percent

(sum(node_filesystem_size_bytes{device!="rootfs",instance=~"10.0.2.15.*"}) by (instance) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"10.0.2.15.*"}) by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"10.0.2.15.*"}) by (instance)

node_network_receive_errors_sum_rate

sum(node_network_receive_receive_errs_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_cpu_user_seconds_sum_rate

sum(rate(node_cpu_seconds_total{mode!="user"}[5m])) by (instance)

node_memory_usage_percent

1 - sum(node_memory_MemAvailable_bytes{instance=~"10.0.2.15.*"}) by (instance) / sum(node_memory_MemTotal_bytes{instance=~"10.0.2.15.*"}) by (instance)

node_memory_page_in_bytes_sum_rate

1e3 * sum(rate(node_vmstat_pgpgin{instance=~"10.0.2.15.*"}[5m])) by (instance)

node_network_receive_packets_dropped_sum_rate

sum(node_network_receive_drop_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_network_receive_packets_sum_rate

sum(node_network_receive_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

node_network_transmit_packets_sum_rate

sum(node_network_transmit_packets_total{device!~"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*",instance=~"10.0.2.15.*"}) by (instance)

Workload

workload_cpu_usage_seconds_sum_rate

sum(rate(container_cpu_usage_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_cpu_cfs_throttled_seconds_sum_rate

sum(rate(container_cpu_cfs_throttled_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_transmit_bytes_sum_rate

sum(rate(container_network_transmit_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_transmit_packets_dropped_sum_rate

sum(rate(container_network_transmit_packets_dropped_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_cpu_system_seconds_sum_rate

sum(rate(container_cpu_system_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_disk_io_writes_bytes_sum_rate

sum(rate(container_fs_writes_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_fs_bytes_sum

sum(container_fs_usage_bytes{namespace="cattle-prometheus", pod_name=~"prometheus-system-monitoring.*"}) by (pod_name)

workload_network_receive_bytes_sum_rate

sum(rate(container_network_receive_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_transmit_packets_sum_rate

sum(rate(container_network_transmit_packets_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_memory_usage_bytes_sum

sum(container_memory_working_set_bytes{name!~"POD", namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}) by (pod_name)

workload_memory_usage_percent

sum(container_memory_working_set_bytes{namespace="cattle-prometheus", pod_name=~"prometheus-system-monitoring.*"}) by (pod_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace="cattle-prometheus", pod=~"prometheus-system-monitoring.*"},"pod_name", "", "pod")) by (pod_name)

workload_disk_io_reads_bytes_sum_rate

sum(rate(container_fs_reads_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_receive_errors_sum_rate

sum(rate(container_network_receive_errors_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_receive_packets_sum_rate

sum(rate(container_network_receive_packets_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_cpu_user_seconds_sum_rate

sum(rate(container_cpu_user_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_transmit_errors_sum_rate

sum(rate(container_network_transmit_errors_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

workload_network_receive_packets_dropped_sum_rate

sum(rate(container_network_receive_packets_dropped_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring.*"}[5m])) by (pod_name)

Pod Metrics

pod_fs_bytes_sum

sum(container_fs_usage_bytes{namespace="cattle-prometheus", pod_name=~"prometheus-system-monitoring-0.*"}) by (pod_name)

pod_network_receive_errors_sum_rate

sum(rate(container_network_receive_errors_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_transmit_errors_sum_rate

sum(rate(container_network_transmit_errors_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_cpu_usage_seconds_sum_rate

sum(rate(container_cpu_usage_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_cpu_user_seconds_sum_rate

sum(rate(container_cpu_user_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_memory_usage_bytes_sum

sum(container_memory_working_set_bytes{name!~"POD", namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}) by (pod_name)

pod_network_receive_packets_sum_rate

sum(rate(container_network_receive_packets_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_cpu_system_seconds_sum_rate

sum(rate(container_cpu_system_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_memory_usage_percent

sum(container_memory_working_set_bytes{namespace="cattle-prometheus", pod_name=~"prometheus-system-monitoring-0.*"}) by (pod_name) / sum(label_join(kube_pod_container_resource_limits_memory_bytes{namespace="cattle-prometheus", pod=~"prometheus-system-monitoring-0.*"},"pod_name", "", "pod")) by (pod_name)

pod_disk_io_reads_bytes_sum_rate

sum(rate(container_fs_reads_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_transmit_packets_dropped_sum_rate

sum(rate(container_network_transmit_packets_dropped_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_cpu_cfs_throttled_seconds_sum_rate

sum(rate(container_cpu_cfs_throttled_seconds_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_disk_io_writes_bytes_sum_rate

sum(rate(container_fs_writes_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_receive_bytes_sum_rate

sum(rate(container_network_receive_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_transmit_bytes_sum_rate

sum(rate(container_network_transmit_bytes_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_transmit_packets_sum_rate

sum(rate(container_network_transmit_packets_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

pod_network_receive_packets_dropped_sum_rate

sum(rate(container_network_receive_packets_dropped_total{namespace="cattle-prometheus",pod_name=~"prometheus-system-monitoring-0.*"}[5m])) by (pod_name)

ETCD Metrics

etcd_server_leader_sum

sum(etcd_server_has_leader{instance=~"10.0.2.15.*"}) by (instance)

etcd_server_leader_changes_seen_sum_increase

sum(increase(etcd_server_leader_changes_seen_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_server_insufficient_members_count

count(up{job="exporter-kube-etcd-system-monitoring", instance=~"10.0.2.15.*"}) by (instance)

etcd_server_proposals_applied_sum_increase

sum(increase(etcd_server_proposals_applied_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_server_proposals_committed_sum_increase

sum(increase(etcd_server_proposals_committed_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_server_proposals_pending_sum_increase

sum(increase(etcd_server_proposals_pending_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_server_proposals_failed_sum_increase

sum(increase(etcd_server_proposals_failed_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_db_bytes_sum

sum(etcd_debugging_mvcc_db_total_size_in_bytes{instance=~"10.0.2.15.*"}) by (instance)

etcd_disk_snapshot_duration_seconds_sum_quantile

sum(histogram_quantile(0.99, rate(etcd_disk_backend_snapshot_duration_seconds_bucket{instance=~"10.0.2.15.*"}[5m]))) by (instance)

etcd_disk_wal_fsync_duration_seconds_sum_quantile

sum(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{instance=~"10.0.2.15.*"}[5m]))) by (instance)

etcd_disk_commit_duration_seconds_sum_quantile

sum(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{instance=~"10.0.2.15.*"}[5m]))) by (instance)

etcd_grpc_request_error_percent

sum(rate(grpc_server_handled_total{grpc_code!="OK", instance=~"10.0.2.15.*"}[5m])) / sum(rate(grpc_server_handled_total{instance=~"10.0.2.15.*"}[5m]))

etcd_grpc_request_slow_quantile

sum(histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{grpc_type="unary", instance=~"10.0.2.15.*"}[5m])))) by (instance)

etcd_grpc_client_transmit_bytes_sum_rate

sum(rate(etcd_network_client_grpc_sent_bytes_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

etcd_grpc_client_receive_bytes_sum_rate

sum(rate(etcd_network_client_grpc_received_bytes_total{instance=~"10.0.2.15.*"}[5m])) by (instance)

API Server Metrics

apiserver_request_latency_milliseconds_avg

avg(apiserver_request_latencies_sum / apiserver_request_latencies_count) / 1e+06

apiserver_request_error_percent

sum(rate(apiserver_request_count{code=~"^(?:5..)$", instance=~"10.0.2.15.*"}[5m])) by (instance) / sum(rate(apiserver_request_count[5m])) by (instance)

apiserver_request_count_sum_rate

sum(rate(apiserver_request_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_helper_cache_hit_count_sum_rate

sum(rate(etcd_helper_cache_hit_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_helper_cache_miss_count_sum_rate

sum(rate(etcd_helper_cache_miss_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_request_cache_add_latencies_summary_sum_rate

sum(rate(request_cache_add_latencies_summary_sum{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_request_cache_get_latencies_summary

sum(rate(request_cache_get_latencies_summary{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_request_cache_get_latencies_summary_sum_rate

sum(rate(request_cache_get_latencies_summary_sum{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_request_cache_get_latencies_summary_count_rate

sum(rate(request_cache_get_latencies_summary_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_request_cache_add_latencies_summary

sum(etcd_request_cache_add_latencies_summary{instance=~"10.0.2.15.*"}) by (instance)

apiserver_etcd_request_cache_add_latencies_summary_count_rate

sum(rate(request_cache_add_latencies_summary_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_etcd_helper_cache_entry_count_sum_rate

sum(rate(etcd_helper_cache_entry_count{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_api_service_registration_controller_queue_duration_sum

sum(api_service_registration_work_duration{instance=~"10.0.2.15.*"}) by (instance)

apiserver_api_service_registration_controller_adds_sum_rate

sum(rate(api_service_registration_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

apiserver_api_service_registration_controller_queue_latency_sum

sum(api_service_registration_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

apiserver_api_service_registration_controller_queue_retries_sum

sum(api_service_registration_retries{instance=~"10.0.2.15.*"}) by (instance)

apiserver_namespace_container_cpu_usage_sum_rate

sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m])) by (namespace)

apiserver_namespace_container_memory_usage_bytes_sum

sum(container_memory_usage_bytes{container_name!=""}) by (namespace)

apiserver_namespace_container_spec_cpu_shares_sum

sum(container_spec_cpu_shares{container_name!=""}) by (namespace)

Scheduler

scheduler_scheduling_algorithm_latency_seconds_quantile

histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket{instance=~"10.0.2.15.*"}) by (le, instance)) / 1e+06

scheduler_binding_latency_seconds_quantile

histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket{instance=~"10.0.2.15.*"}) by (le, instance)) / 1e+06

scheduler_scheduling_latency_seconds_quantile

sum(scheduler_scheduling_latency_seconds{instance=~"10.0.2.15.*"}) by (instance, operation)

scheduler_e_2_e_scheduling_latency_seconds_quantile

histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket{instance=~"10.0.2.15.*"}) by (le, instance)) / 1e+06

Controller Manager

controllermanager_deployment_adds_rate

sum(rate(deployment_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_deployment_queue_latency

sum(deployment_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_deployment_adds

sum(deployment_adds{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_deployment_retries

sum(deployment_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_deployment_work_duration

sum(deployment_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_replicaset_adds

sum(replicaset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_replicaset_retries

sum(replicaset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_statefulset_adds

sum(statefulset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_daemonset_adds

sum(daemonset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_daemonset_retries

sum(daemonset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_statefulset_queue_latency

sum(statefulset_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_endpoint_queue_latency

sum(endpoint_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_service_queue_latency

sum(service_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_serviceaccount_adds

sum(serviceaccount_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_endpoint_adds

sum(endpoint_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_endpoint_work_duration

sum(endpoint_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_service_work_duration

sum(service_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_serviceaccount_adds_rate

sum(rate(serviceaccount_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_serviceaccount_retries

sum(serviceaccount_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_volumes_work_duration

sum(volumes_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_daemonset_queue_latency

sum(daemonset_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_replicaset_work_duration

sum(replicaset_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_endpoint_adds_rate

sum(rate(endpoint_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_service_adds_rate

sum(rate(service_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_service_adds

sum(service_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_volumes_queue_latency

sum(volumes_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_replicaset_adds_rate

sum(rate(replicaset_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_service_retries

sum(service_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_serviceaccount_queue_latency

sum(serviceaccount_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_statefulset_adds_rate

sum(rate(statefulset_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_statefulset_work_duration

sum(statefulset_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_volumes_retries

sum(volumes_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_endpoint_retries

sum(endpoint_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_replicaset_queue_latency

sum(replicaset_queue_latency{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_serviceaccount_work_duration

sum(serviceaccount_work_duration{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_statefulset_retries

sum(statefulset_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_volumes_adds_rate

sum(rate(volumes_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_volumes_adds

sum(volumes_retries{instance=~"10.0.2.15.*"}) by (instance)

controllermanager_daemonset_adds_rate

sum(rate(daemonset_adds{instance=~"10.0.2.15.*"}[5m])) by (instance)

controllermanager_daemonset_work_duration

sum(daemonset_work_duration{instance=~"10.0.2.15.*"}) by (instance)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment