Skip to content

Instantly share code, notes, and snippets.

@fritshoogland-yugabyte
Last active April 13, 2021 13:25
Show Gist options
  • Save fritshoogland-yugabyte/d787c5c668487f37d708213e715a88ac to your computer and use it in GitHub Desktop.
Save fritshoogland-yugabyte/d787c5c668487f37d708213e715a88ac to your computer and use it in GitHub Desktop.
Yugabyte server metrics diff
#!/usr/bin/env python
from timeit import default_timer as timer
import urllib.request as request
import json, time, sys, os, re
time_between_updates = 5
skip_tablets = False
group_tablet_stats = False
current_statistics_values = {}
current_statistics_total_count = {}
current_statistics_total_sum = {}
current_statistics_min = {}
current_statistics_max = {}
current_statistics_mean = {}
current_statistics_median = {}
current_statistics_std_dev = {}
current_statistics_percentile_75 = {}
current_statistics_percentile_95 = {}
current_statistics_percentile_99 = {}
current_statistics_percentile_99_9 = {}
current_statistics_percentile_99_99 = {}
current_statistics_count = {}
current_statistics_sum = {}
current_statistics_rows = {}
current_rpcs = {}
current_time=timer()
url_to_read = [ { 'server_type': "MS", 'url': "http://localhost:7000/metrics" },
{ 'server_type': "MSR", 'url': "http://localhost:9000/rpcz" },
{ 'server_type': "TS", 'url': "http://localhost:9000/metrics" },
{ 'server_type': "TSR", 'url': "http://localhost:9000/rpcz" },
{ 'server_type': "TR", 'url': "http://localhost:11000/metrics" },
{ 'server_type': "TRR", 'url': "http://localhost:11000/rpcz" },
{ 'server_type': "TC", 'url': "http://localhost:12000/metrics" },
{ 'server_type': "TCR", 'url': "http://localhost:12000/rpcz" },
{ 'server_type': "TP", 'url': "http://localhost:13000/metrics" },
{ 'server_type': "TPR", 'url': "http://localhost:13000/rpcz" } ]
while True:
previous_statistics_values=current_statistics_values
previous_statistics_total_count=current_statistics_total_count
previous_statistics_total_sum=current_statistics_total_sum
previous_statistics_min=current_statistics_min
previous_statistics_max=current_statistics_max
previous_statistics_mean=current_statistics_mean
previous_statistics_median=current_statistics_median
previous_statistics_std_dev=current_statistics_std_dev
previous_statistics_percentile_75=current_statistics_percentile_75
previous_statistics_percentile_95=current_statistics_percentile_95
previous_statistics_percentile_99=current_statistics_percentile_99
previous_statistics_percentile_99_9=current_statistics_percentile_99_9
previous_statistics_percentile_99_99=current_statistics_percentile_99_99
previous_statistics_count=current_statistics_count
previous_statistics_sum=current_statistics_sum
previous_statistics_rows=current_statistics_rows
previous_rpcs=current_rpcs
previous_time=current_time
current_statistics_values = {}
current_statistics_total_count = {}
current_statistics_total_sum = {}
current_statistics_min = {}
current_statistics_max = {}
current_statistics_mean = {}
current_statistics_median = {}
current_statistics_std_dev = {}
current_statistics_percentile_75 = {}
current_statistics_percentile_95 = {}
current_statistics_percentile_99 = {}
current_statistics_percentile_99_9 = {}
current_statistics_percentile_99_99 = {}
current_statistics_count = {}
current_statistics_sum = {}
current_statistics_rows = {}
current_rpcs = {}
for current_url in url_to_read:
try:
with request.urlopen(current_url['url']) as response:
http_response = response.read()
parsed_json = json.loads(http_response)
current_time = timer()
except:
print("server: %s (%s) not found." % (current_url['server_type'], current_url['url']))
continue
for raw_metrics in parsed_json:
if 'type' in raw_metrics:
type = raw_metrics['type']
id = raw_metrics['id']
if type == 'tablet':
# id is tablet id.
# tablet attributes
partition = raw_metrics['attributes']['partition']
namespace = raw_metrics['attributes']['namespace_name']
table_name = raw_metrics['attributes']['table_name']
table_id = raw_metrics['attributes']['table_id']
# name prefix
prefix = current_url['server_type']+':'+type+':'+namespace+':'+table_name+':'+partition+':'
elif type == 'cluster':
# attributes is empty
prefix = current_url['server_type']+':'+type+':'+id+':'
elif type == 'server':
# attributes is empty
# types of servers (id): yb.tabletserver, yb.master, yb.cqlserver, yb.redisserver and yb.ysqlserver
prefix = current_url['server_type']+':'+type+':'+id+':'
else:
print( "ERROR - type:%s" % (type) )
for metric in raw_metrics['metrics']:
if 'value' in metric:
current_statistics_values[ prefix+metric['name'] ] = metric['value']
elif 'std_dev' in metric:
current_statistics_total_count[ prefix+metric['name'] ] = metric['total_count']
current_statistics_total_sum[ prefix+metric['name'] ] = metric['total_sum']
current_statistics_min[ prefix+metric['name'] ] = metric['min']
current_statistics_max[ prefix+metric['name'] ] = metric['max']
current_statistics_mean[ prefix+metric['name'] ] = metric['mean']
current_statistics_median[ prefix+metric['name'] ] = metric['median']
current_statistics_std_dev[ prefix+metric['name'] ] = metric['std_dev']
current_statistics_percentile_95[ prefix+metric['name'] ] = metric['percentile_95']
current_statistics_percentile_99[ prefix+metric['name'] ] = metric['percentile_99']
elif 'percentile_75' in metric:
current_statistics_total_count[ prefix+metric['name'] ] = metric['total_count']
current_statistics_total_sum[ prefix+metric['name'] ] = metric['total_sum']
current_statistics_min[ prefix+metric['name'] ] = metric['min']
current_statistics_max[ prefix+metric['name'] ] = metric['max']
current_statistics_mean[ prefix+metric['name'] ] = metric['mean']
current_statistics_percentile_75[ prefix+metric['name'] ] = metric['percentile_75']
current_statistics_percentile_95[ prefix+metric['name'] ] = metric['percentile_95']
current_statistics_percentile_99[ prefix+metric['name'] ] = metric['percentile_99']
current_statistics_percentile_99_9[ prefix+metric['name'] ] = metric['percentile_99_9']
current_statistics_percentile_99_99[ prefix+metric['name'] ] = metric['percentile_99_99']
elif 'count' in metric:
current_statistics_count[ prefix+metric['name'] ] = metric['count']
current_statistics_sum[ prefix+metric['name'] ] = metric['sum']
current_statistics_rows[ prefix+metric['name'] ] = metric['rows']
else:
print( "ERROR: - metric%s" % (metric) )
elif 'inbound_connections' in raw_metrics or 'outbound_connections' in raw_metrics:
# this is only master and tserver rpc connections, ysql/cql connections have a different JSON format
for connection in parsed_json['inbound_connections']:
current_rpcs[ 'in'+' '+current_url['server_type']+' '+connection['remote_ip']+' '+connection['state'] ] = connection['processed_call_count']
# outbound connections are not unique with remote_ip + state combination !!
for connection in parsed_json['outbound_connections']:
current_rpcs[ 'out'+' '+current_url['server_type']+' '+connection['remote_ip']+' '+connection['state'] ] = connection['processed_call_count']
change_statistics_values={}
change_statistics_total_count={}
change_statistics_total_sum={}
change_statistics_min={}
change_statistics_max={}
change_statistics_mean={}
change_statistics_median={}
change_statistics_std_dev={}
change_statistics_percentile_75={}
change_statistics_percentile_95={}
change_statistics_percentile_99={}
change_statistics_percentile_99_9={}
change_statistics_percentile_99_99={}
change_statistics_count={}
change_statistics_sum={}
change_statistics_rows={}
change_rpcs={}
change_statistics_time=current_time-previous_time
# value type
for statistic_name in current_statistics_values:
if statistic_name in previous_statistics_values:
change_statistics_values[statistic_name]=current_statistics_values[statistic_name]-previous_statistics_values[statistic_name]
else:
change_statistics_values[statistic_name]=current_statistics_values[statistic_name]
# std_dev type
for statistic_name in current_statistics_std_dev:
if statistic_name in previous_statistics_total_count and current_statistics_total_count[statistic_name] >= previous_statistics_total_count[statistic_name]:
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name]
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]-previous_statistics_total_sum[statistic_name]
else:
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]
change_statistics_min[statistic_name]=current_statistics_min[statistic_name]
change_statistics_max[statistic_name]=current_statistics_max[statistic_name]
change_statistics_mean[statistic_name]=current_statistics_mean[statistic_name]
change_statistics_median[statistic_name]=current_statistics_median[statistic_name]
change_statistics_std_dev[statistic_name]=current_statistics_std_dev[statistic_name]
change_statistics_percentile_95[statistic_name]=current_statistics_percentile_95[statistic_name]
change_statistics_percentile_99[statistic_name]=current_statistics_percentile_99[statistic_name]
# percentile_75 type
for statistic_name in current_statistics_percentile_75:
if statistic_name in previous_statistics_total_count and current_statistics_total_count[statistic_name] >= previous_statistics_total_count[statistic_name]:
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name]
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]-previous_statistics_total_count[statistic_name]
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]-previous_statistics_total_sum[statistic_name]
else:
change_statistics_total_count[statistic_name]=current_statistics_total_count[statistic_name]
change_statistics_total_sum[statistic_name]=current_statistics_total_sum[statistic_name]
change_statistics_min[statistic_name]=current_statistics_min[statistic_name]
change_statistics_max[statistic_name]=current_statistics_max[statistic_name]
change_statistics_mean[statistic_name]=current_statistics_mean[statistic_name]
change_statistics_percentile_75[statistic_name]=current_statistics_percentile_75[statistic_name]
change_statistics_percentile_95[statistic_name]=current_statistics_percentile_95[statistic_name]
change_statistics_percentile_99[statistic_name]=current_statistics_percentile_99[statistic_name]
change_statistics_percentile_99_9[statistic_name]=current_statistics_percentile_99_9[statistic_name]
change_statistics_percentile_99_99[statistic_name]=current_statistics_percentile_99_99[statistic_name]
# count type
for statistic_name in current_statistics_count:
if statistic_name in previous_statistics_count and current_statistics_count[statistic_name] >= previous_statistics_count[statistic_name]:
change_statistics_count[statistic_name]=current_statistics_count[statistic_name]-previous_statistics_count[statistic_name]
change_statistics_sum[statistic_name]=current_statistics_sum[statistic_name]-previous_statistics_sum[statistic_name]
change_statistics_rows[statistic_name]=current_statistics_rows[statistic_name]-previous_statistics_rows[statistic_name]
else:
change_statistics_count[statistic_name]=current_statistics_count[statistic_name]
change_statistics_sum[statistic_name]=current_statistics_sum[statistic_name]
change_statistics_rows[statistic_name]=current_statistics_rows[statistic_name]
# rpcs
for connection in current_rpcs:
if connection in previous_rpcs and current_rpcs[connection] >= previous_rpcs[connection]:
change_rpcs[connection]=current_rpcs[connection]-previous_rpcs[connection]
else:
change_rpcs[connection]=current_rpcs[connection]
for server in [ 'MS', 'TS', 'TR', 'TC', 'TP' ]:
if server in [ 'MS', 'TS' ]:
print("--server: %s, time in snapshot: %5.5f-------------------------" % (server, change_statistics_time))
for server_type in [ 'cluster', 'server', 'tablet' ]:
for statistic_name in change_statistics_values:
if server not in [ 'TR', 'TC' ] and statistic_name not in [ 'cpu_utime', 'cpu_stime', 'involuntary_context_switches', 'voluntary_context_switches', 'hybrid_clock_hybrid_time', 'tcmalloc_current_total_thread_cache_bytes' ]:
if statistic_name.startswith(server+':'+server_type) and change_statistics_values[statistic_name] > 0:
print("%-90s (values): %15d, %15.3f p/sec" % (statistic_name[:90], change_statistics_values[statistic_name], change_statistics_values[statistic_name]/change_statistics_time))
for statistic_name in change_statistics_std_dev:
if statistic_name.startswith(server+':'+server_type) and change_statistics_total_count[statistic_name] > 0:
#print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, median: %12.2f, stddev: %12.2f, perc_95: %12.2f, perc_99: %12.2f, max: %12.2f" % (statistic_name[:80], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_median[statistic_name], change_statistics_std_dev[statistic_name], change_statistics_percentile_95[statistic_name], change_statistics_percentile_99[statistic_name], change_statistics_max[statistic_name]))
print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, max: %12.2f" % (statistic_name[:90], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_max[statistic_name]))
for statistic_name in change_statistics_percentile_75:
if statistic_name.startswith(server+':'+server_type) and change_statistics_total_count[statistic_name] > 0:
#print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, perc_75: %12.2f, perc_95: %12.2f, perc_99: %12.2f, perc_99.9: %12.2f, perc_99.99: %12.2f, max: %12.2f" % (statistic_name[:80], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_percentile_75[statistic_name], change_statistics_percentile_95[statistic_name], change_statistics_percentile_99[statistic_name], change_statistics_percentile_99_9[statistic_name], change_statistics_percentile_99_99[statistic_name], change_statistics_max[statistic_name]))
print("%-90s (lat) : %15d, %15.3f p/sec, min: %12.2f, mean: %12.2f, max: %12.2f" % (statistic_name[:90], change_statistics_total_count[statistic_name], change_statistics_total_count[statistic_name]/change_statistics_time, change_statistics_min[statistic_name], change_statistics_mean[statistic_name], change_statistics_max[statistic_name]))
for statistic_name in change_statistics_count:
if statistic_name.startswith(server+':'+server_type) and change_statistics_count[statistic_name] > 0:
print("%-90s : %15d, %15.3f p/sec, sum: %12.2f, rows: %12.2f" % (statistic_name[:90], change_statistics_count[statistic_name], change_statistics_count[statistic_name]/change_statistics_time, change_statistics_sum[statistic_name], change_statistics_rows[statistic_name]))
print("--rpc-----------------")
for connection in change_rpcs:
print("%-90s (calls) : %15d, %15.3f p/sec" % (connection[:90], change_rpcs[connection], change_rpcs[connection]/change_statistics_time))
try:
input("Press enter...")
except:
print('Cancelled.')
exit(0)
os.system('clear')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment