Last active
April 13, 2021 13:25
-
-
Save fritshoogland-yugabyte/d787c5c668487f37d708213e715a88ac to your computer and use it in GitHub Desktop.
Yugabyte server metrics diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from timeit import default_timer as timer | |
import urllib.request as request | |
import json, time, sys, os, re | |
time_between_updates = 5 | |
skip_tablets = False | |
group_tablet_stats = False | |
current_statistics_values = {} | |
current_statistics_total_count = {} | |
current_statistics_total_sum = {} | |
current_statistics_min = {} | |
current_statistics_max = {} | |
current_statistics_mean = {} | |
current_statistics_median = {} | |
current_statistics_std_dev = {} | |
current_statistics_percentile_75 = {} | |
current_statistics_percentile_95 = {} | |
current_statistics_percentile_99 = {} | |
current_statistics_percentile_99_9 = {} | |
current_statistics_percentile_99_99 = {} | |
current_statistics_count = {} | |
current_statistics_sum = {} | |
current_statistics_rows = {} | |
current_rpcs = {} | |
current_time=timer() | |
url_to_read = [ { 'server_type': "MS", 'url': "http://localhost:7000/metrics" }, | |
{ 'server_type': "MSR", 'url': "http://localhost:9000/rpcz" }, | |
{ 'server_type': "TS", 'url': "http://localhost:9000/metrics" }, | |
{ 'server_type': "TSR", 'url': "http://localhost:9000/rpcz" }, | |
{ 'server_type': "TR", 'url': "http://localhost:11000/metrics" }, | |
{ 'server_type': "TRR", 'url': "http://localhost:11000/rpcz" }, | |
{ 'server_type': "TC", 'url': "http://localhost:12000/metrics" }, | |
{ 'server_type': "TCR", 'url': "http://localhost:12000/rpcz" }, | |
{ 'server_type': "TP", 'url': "http://localhost:13000/metrics" }, | |
{ 'server_type': "TPR", 'url': "http://localhost:13000/rpcz" } ] | |
while True: | |
previous_statistics_values=current_statistics_values | |
previous_statistics_total_count=current_statistics_total_count | |
previous_statistics_total_sum=current_statistics_total_sum | |
previous_statistics_min=current_statistics_min | |
previous_statistics_max=current_statistics_max | |
previous_statistics_mean=current_statistics_mean | |
previous_statistics_median=current_statistics_median | |
previous_statistics_std_dev=current_statistics_std_dev | |
previous_statistics_percentile_75=current_statistics_percentile_75 | |
previous_statistics_percentile_95=current_statistics_percentile_95 | |
previous_statistics_percentile_99=current_statistics_percentile_99 | |
previous_statistics_percentile_99_9=current_statistics_percentile_99_9 | |
previous_statistics_percentile_99_99=current_statistics_percentile_99_99 | |
previous_statistics_count=current_statistics_count | |
previous_statistics_sum=current_statistics_sum | |
previous_statistics_rows=current_statistics_rows | |
previous_rpcs=current_rpcs | |
previous_time=current_time | |
current_statistics_values = {} | |
current_statistics_total_count = {} | |
current_statistics_total_sum = {} | |
current_statistics_min = {} | |
current_statistics_max = {} | |
current_statistics_mean = {} | |
current_statistics_median = {} | |
current_statistics_std_dev = {} | |
current_statistics_percentile_75 = {} | |
current_statistics_percentile_95 = {} | |
current_statistics_percentile_99 = {} | |
current_statistics_percentile_99_9 = {} | |
current_statistics_percentile_99_99 = {} | |
current_statistics_count = {} | |
current_statistics_sum = {} | |
current_statistics_rows = {} | |
current_rpcs = {} | |
for current_url in url_to_read: | |
try: | |
with request.urlopen(current_url['url']) as response: | |
http_response = response.read() | |
parsed_json = json.loads(http_response) | |
current_time = timer() | |
except: | |
print("server: %s (%s) not found." % (current_url['server_type'], current_url['url'])) | |
continue | |
for raw_metrics in parsed_json: | |
if 'type' in raw_metrics: | |
type = raw_metrics['type'] | |
id = raw_metrics['id'] | |
if type == 'tablet': | |
# id is tablet id. | |
# tablet attributes | |
partition = raw_metrics['attributes']['partition'] | |
namespace = raw_metrics['attributes']['namespace_name'] | |
table_name = raw_metrics['attributes']['table_name'] | |
table_id = raw_metrics['attributes']['table_id'] | |
# name prefix | |
prefix = current_url['server_type']+':'+type+':'+namespace+':'+table_name+':'+partition+':' | |
elif type == 'cluster': | |
# attributes is empty | |
prefix = current_url['server_type']+':'+type+':'+id+':' | |
elif type == 'server': | |
# attributes is empty | |
# types of servers (id): yb.tabletserver, yb.master, yb.cqlserver, yb.redisserver and yb.ysqlserver | |
prefix = current_url['server_type']+':'+type+':'+id+':' | |
else: | |
print( "ERROR - type:%s" % (type) ) | |
for metric in raw_metrics['metrics']: | |
if 'value' in metric: | |
current_statistics_values[ prefix+metric['name'] ] = metric['value'] | |
elif 'std_dev' in metric: | |
current_statistics_total_count[ prefix+metric['name'] ] = metric['total_count'] | |
current_statistics_total_sum[ prefix+metric['name'] ] = metric['total_sum'] | |
current_statistics_min[ prefix+metric['name'] ] = metric['min'] | |
current_statistics_max[ prefix+metric['name'] ] = metric['max'] | |
current_statistics_mean[ prefix+metric['name'] ] = metric['mean'] | |
current_statistics_median[ prefix+metric['name'] ] = metric['median'] | |
current_statistics_std_dev[ prefix+metric['name'] ] = metric['std_dev'] | |
current_statistics_percentile_95[ prefix+metric['name'] ] = metric['percentile_95'] | |
current_statistics_percentile_99[ prefix+metric['name'] ] = metric['percentile_99'] | |
elif 'percentile_75' in metric: | |
current_statistics_total_count[ prefix+metric['name'] ] = metric['total_count'] | |
current_statistics_total_sum[ prefix+metric['name'] ] = metric['total_sum'] | |
current_statistics_min[ prefix+metric['name'] ] = metric['min'] | |
current_statistics_max[ prefix+metric['name'] ] = metric['max'] | |
current_statistics_mean[ prefix+metric['name'] ] = metric['mean'] | |
current_statistics_percentile_75[ prefix+metric['name'] ] = metric['percentile_75'] | |
current_statistics_percentile_95[ prefix+metric['name'] ] = metric['percentile_95'] | |
current_statistics_percentile_99[ prefix+metric['name'] ] = metric['percentile_99'] | |
current_statistics_percentile_99_9[ prefix+metric['name'] ] = metric['percentile_99_9'] | |
current_statistics_percentile_99_99[ prefix+metric['name'] ] = metric['percentile_99_99'] | |
elif 'count' in metric: | |
current_statistics_count[ prefix+metric['name'] ] = metric['count'] | |
current_statistics_sum[ prefix+metric['name'] ] = metric['sum'] | |
current_statistics_rows[ prefix+metric['name'] ] = metric['rows'] | |
else: | |
print( "ERROR: - metric%s" % (metric) ) | |
elif 'inbound_connections' in raw_metrics or 'outbound_connections' in raw_metrics: | |
# this is only master and tserver rpc connections, ysql/cql connections have a different JSON format | |
for connection in parsed_json['inbound_connections']: | |
current_rpcs[ 'in'+' '+current_url['server_type']+' '+connection['remote_ip']+' '+connection['state'] ] = connection['processed_call_count'] | |
# outbound connections are not unique with remote_ip + state combination !! | |
for connection in parsed_json['outbound_connections']: | |
current_rpcs[ 'out'+' '+current_url['server_type']+' '+connection['remote_ip']+' '+connection['state'] ] = connection['processed_call_count'] | |
change_statistics_values={} | |
change_statistics_total_count={} | |
change_statistics_total_sum={} | |
change_statistics_min={} | |
change_statistics_max={} | |
change_statistics_mean={} | |
change_statistics_median={} | |
change_statistics_std_dev={} | |
change_statistics_percentile_75={} | |
change_statistics_percentile_95={} | |
change_statistics_percentile_99={} | |
change_statistics_percentile_99_9={} | |
change_statistics_percentile_99_99={} | |
change_statistics_count={} | |
change_statistics_sum={} | |
change_statistics_rows={} | |
change_rpcs={} | |
change_statistics_time=current_time-previous_time | |
# value type | |
for statistic_name in current_statistics_values: | |
if statistic_name in previous_statistics_values: | |
change_statistics_values[statistic_name]=current_statistics_values[statistic_name]-previous_statistics_values[statistic_name] | |
else: | |
change_statistics_values[statistic_name]=current_statistics_values[statistic_name] | |
# std_dev type | |
for statistic_name in current_statistics_std_dev: | |
if statistic_name in previous_statistics_total_count and current_statistics_total_count[statistic_name] >= previous_statistics_total_count[statistic_name]: | |
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name] | |
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]-previous_statistics_total_sum[statistic_name] | |
else: | |
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name] | |
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name] | |
change_statistics_min[statistic_name]=current_statistics_min[statistic_name] | |
change_statistics_max[statistic_name]=current_statistics_max[statistic_name] | |
change_statistics_mean[statistic_name]=current_statistics_mean[statistic_name] | |
change_statistics_median[statistic_name]=current_statistics_median[statistic_name] | |
change_statistics_std_dev[statistic_name]=current_statistics_std_dev[statistic_name] | |
change_statistics_percentile_95[statistic_name]=current_statistics_percentile_95[statistic_name] | |
change_statistics_percentile_99[statistic_name]=current_statistics_percentile_99[statistic_name] | |
# percentile_75 type | |
for statistic_name in current_statistics_percentile_75: | |
if statistic_name in previous_statistics_total_count and current_statistics_total_count[statistic_name] >= previous_statistics_total_count[statistic_name]: | |
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name] | |
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name] | |
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]-previous_statistics_total_sum[statistic_name] | |
else: | |
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name] | |
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name] | |
change_statistics_min[statistic_name]=current_statistics_min[statistic_name] | |
change_statistics_max[statistic_name]=current_statistics_max[statistic_name] | |
change_statistics_mean[statistic_name]=current_statistics_mean[statistic_name] | |
change_statistics_percentile_75[statistic_name]=current_statistics_percentile_75[statistic_name] | |
change_statistics_percentile_95[statistic_name]=current_statistics_percentile_95[statistic_name] | |
change_statistics_percentile_99[statistic_name]=current_statistics_percentile_99[statistic_name] | |
change_statistics_percentile_99_9[statistic_name]=current_statistics_percentile_99_9[statistic_name] | |
change_statistics_percentile_99_99[statistic_name]=current_statistics_percentile_99_99[statistic_name] | |
# count type | |
for statistic_name in current_statistics_count: | |
if statistic_name in previous_statistics_count and current_statistics_count[statistic_name] >= previous_statistics_count[statistic_name]: | |
change_statistics_count[statistic_name]=current_statistics_count[statistic_name]-previous_statistics_count[statistic_name] | |
change_statistics_sum[statistic_name]=current_statistics_sum[statistic_name]-previous_statistics_sum[statistic_name] | |
change_statistics_rows[statistic_name]=current_statistics_rows[statistic_name]-previous_statistics_rows[statistic_name] | |
else: | |
change_statistics_count[statistic_name]=current_statistics_count[statistic_name] | |
change_statistics_sum[statistic_name]=current_statistics_sum[statistic_name] | |
change_statistics_rows[statistic_name]=current_statistics_rows[statistic_name] | |
# rpcs | |
for connection in current_rpcs: | |
if connection in previous_rpcs and current_rpcs[connection] >= previous_rpcs[connection]: | |
change_rpcs[connection]=current_rpcs[connection]-previous_rpcs[connection] | |
else: | |
change_rpcs[connection]=current_rpcs[connection] | |
for server in [ 'MS', 'TS', 'TR', 'TC', 'TP' ]: | |
if server in [ 'MS', 'TS' ]: | |
print("--server: %s, time in snapshot: %5.5f-------------------------" % (server, change_statistics_time)) | |
for server_type in [ 'cluster', 'server', 'tablet' ]: | |
for statistic_name in change_statistics_values: | |
if server not in [ 'TR', 'TC' ] and statistic_name not in [ 'cpu_utime', 'cpu_stime', 'involuntary_context_switches', 'voluntary_context_switches', 'hybrid_clock_hybrid_time', 'tcmalloc_current_total_thread_cache_bytes' ]: | |
if statistic_name.startswith(server+':'+server_type) and change_statistics_values[statistic_name] > 0: | |
print("%-90s (values): %15d, %15.3f p/sec" % (statistic_name[:90], change_statistics_values[statistic_name], change_statistics_values[statistic_name]/change_statistics_time)) | |
for statistic_name in change_statistics_std_dev: | |
if statistic_name.startswith(server+':'+server_type) and change_statistics_total_count[statistic_name] > 0: | |
#print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, median: %12.2f, stddev: %12.2f, perc_95: %12.2f, perc_99: %12.2f, max: %12.2f" % (statistic_name[:80], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_median[statistic_name], change_statistics_std_dev[statistic_name], change_statistics_percentile_95[statistic_name], change_statistics_percentile_99[statistic_name], change_statistics_max[statistic_name])) | |
print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, max: %12.2f" % (statistic_name[:90], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_max[statistic_name])) | |
for statistic_name in change_statistics_percentile_75: | |
if statistic_name.startswith(server+':'+server_type) and change_statistics_total_count[statistic_name] > 0: | |
#print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, perc_75: %12.2f, perc_95: %12.2f, perc_99: %12.2f, perc_99.9: %12.2f, perc_99.99: %12.2f, max: %12.2f" % (statistic_name[:80], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_percentile_75[statistic_name], change_statistics_percentile_95[statistic_name], change_statistics_percentile_99[statistic_name], change_statistics_percentile_99_9[statistic_name], change_statistics_percentile_99_99[statistic_name], change_statistics_max[statistic_name])) | |
print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, max: %12.2f" % (statistic_name[:90], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_max[statistic_name])) | |
for statistic_name in change_statistics_count: | |
if statistic_name.startswith(server+':'+server_type) and change_statistics_count[statistic_name] > 0: | |
print("%-90s : %15d, %15.3f p/sec, sum: %12.2f, rows: %12.2f" % (statistic_name[:90], change_statistics_count[statistic_name], change_statistics_count[statistic_name]/change_statistics_time, change_statistics_sum[statistic_name], change_statistics_rows[statistic_name])) | |
print("--rpc-----------------") | |
for connection in change_rpcs: | |
print("%-90s (calls) : %15d, %15.3f p/sec" % (connection[:90], change_rpcs[connection], change_rpcs[connection]/change_statistics_time)) | |
try: | |
input("Press enter...") | |
except: | |
print('Cancelled.') | |
exit(0) | |
os.system('clear') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment