Skip to content

Instantly share code, notes, and snippets.

@arapulido
Last active April 30, 2024 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arapulido/d3c532ab5eeb53ccb6b561627f0a0c43 to your computer and use it in GitHub Desktop.
Save arapulido/d3c532ab5eeb53ccb6b561627f0a0c43 to your computer and use it in GitHub Desktop.
Dashboard for right-sizing workshop
{"title":"K8s right-sizing - Workshop","description":"[[suggested_dashboards]]","widgets":[{"id":8514541746519518,"definition":{"title":"Service and cluster metrics and events","background_color":"vivid_blue","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":2150930641154234,"definition":{"title":"P95 latency per service","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"p95:trace.flask.request{$service}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}},{"id":8746766598521700,"definition":{"title":"Relevant Events in our Cluster","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"alias":"stress containers started","formula":"query1"}],"queries":[{"data_source":"events","name":"query1","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"pod_name:(stress1 OR stress2 OR stress3) message:\"Started container stress*\""}}],"response_format":"timeseries","style":{"palette":"cool","line_type":"solid","line_width":"normal"},"display_type":"bars"},{"formulas":[{"formula":"query0"}],"queries":[{"data_source":"events","name":"query0","indexes":["*"],"compute":{"aggregation":"count"},"group_by":[],"search":{"query":"pod_name:(stress1 OR stress2 OR stress3) message:\"Stopping container stress*\""}}],"response_format":"timeseries","style":{"palette":"warm","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":4,"y":0,"width":4,"height":2}},{"id":1346086513702222,"definition":{"title":"CPU usage per service","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:container.cpu.usage{$service}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"style":{"palette":"warm","palette_index":0},"formula":"query1"},{"style":{"palette":"warm","palette_index":6},"formula":"query2"}],"queries":[{"name":"query1","data_source":"metrics","query":"min:kubernetes.cpu.requests{$service}"},{"name":"query2","data_source":"metrics","query":"min:kubernetes.cpu.limits{$service}"}],"response_format":"timeseries","style":{"palette":"warm","line_type":"dashed","line_width":"thin"},"display_type":"line"}],"markers":[{"value":"y = 0","display_type":"error dashed"}]},"layout":{"x":8,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":12,"height":3}},{"id":2732823546502230,"definition":{"title":"Lab 4 - CPU Throttling","background_color":"vivid_purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":5209896565681020,"definition":{"title":"CPU Throttling","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"sum:container.cpu.throttled.periods{$service} by {container_name}"}],"response_format":"timeseries","style":{"palette":"warm","line_type":"solid","line_width":"normal"},"display_type":"bars"},{"on_right_yaxis":true,"formulas":[{"formula":"query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"sum:container.cpu.throttled{$service} by {container_name}"}],"response_format":"timeseries","style":{"palette":"blue","line_type":"solid","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":4,"height":2}}]},"layout":{"x":0,"y":0,"width":4,"height":3}},{"id":8662264146432054,"definition":{"title":"Lab 5 - CPUManager static","background_color":"vivid_green","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":689192170197526,"definition":{"title":"Container CPU Usage and Limit","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"horizontal","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:container.cpu.usage{$service}"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"line"},{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"min:container.cpu.limit{$service}"}],"response_format":"timeseries","style":{"palette":"warm","line_type":"dashed","line_width":"normal"},"display_type":"line"}]},"layout":{"x":0,"y":0,"width":8,"height":2}}]},"layout":{"x":4,"y":0,"width":8,"height":3}},{"id":6265344783632570,"definition":{"title":"Lab 6 - Memory","background_color":"vivid_pink","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":5446443117562526,"definition":{"title":"Sum of OOM kills by process_name","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:oom_kill.oom_process.count{*} by {process_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":0,"y":0,"width":5,"height":3}},{"id":6516811199248888,"definition":{"title":"Kubelet Evictions by (hostname, signal)","title_size":"16","title_align":"left","show_legend":false,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:kubernetes.kubelet.evictions{*} by {eviction_signal,hostname}.as_count()"}],"response_format":"timeseries","style":{"palette":"dog_classic","line_type":"solid","line_width":"normal"},"display_type":"bars"}]},"layout":{"x":5,"y":0,"width":5,"height":3}}]},"layout":{"x":0,"y":0,"width":10,"height":4,"is_column_break":true}}],"template_variables":[{"name":"service","prefix":"service","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"fixed"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment