filipecosta90/merge_histograms.py

## merge_histograms.py

# To find the percentile of a combined dataset of latency values from two Redis servers,
# you would first need to retrieve the latency data from each server and combine it into
# a single dataset.
# Once you have the combined dataset, you can calculate the desired percentile.

import redis
from hdrh.histogram import HdrHistogram

# connect to shard 1
shard1_conn = redis.StrictRedis(port=12000)

# connect to shard 2
shard2_conn = redis.StrictRedis(port=12001)

shard_conns = [shard1_conn, shard2_conn]

commands = []
for shard_conn in shard_conns:
    cmdstats = shard_conn.info("commandstats")
    for cmd_with_prefix in cmdstats.keys():
        # remove 'cmdstat_' from 'cmdstat_ping'
        cmd = cmd_with_prefix[8:]
        # avoid config or any |
        if "|" in cmd or "config" in cmd:
            continue
        if cmd not in commands:
            commands.append(cmd)

print("Will provide the merged metrics for {} disticnt commands".format(len(commands)))

# histogram __init__ values
LOWEST = 1
HIGHEST = 3600 * 1000 * 1000
SIGNIFICANT = 3

for cmd in commands:
    accumulated_histogram = HdrHistogram(LOWEST, HIGHEST, SIGNIFICANT)
    for shard_conn in shard_conns:
        # sample [b'get', [b'calls', 100002, b'histogram_usec', [1, 99805, 2, 99821, 4, 99915, 8, 99969, 16, 99999, 33, 100002]]]
        histogram_detail = shard_conn.execute_command(
            "latency", "histogram", cmd)
        if len(histogram_detail) == 0:
            continue
        # sample: [1, 99805, 2, 99821, 4, 99915, 8, 99969, 16, 99999, 33, 100002]
        histogram_usec = (histogram_detail[1])[3]
        n_buckets = len(histogram_usec) / 2
        bucket_pos = 0
        previous_count = 0
        # notice this is a cumulative histogram so you need to reduct the previous bucket count
        while bucket_pos < n_buckets:
            latency_us = histogram_usec[bucket_pos * 2]
            cummulative_observations = histogram_usec[bucket_pos * 2 + 1]
            observations = cummulative_observations - previous_count
            accumulated_histogram.record_value(latency_us, observations)
            bucket_pos = bucket_pos + 1
            previous_count = cummulative_observations

    accumulated_p50 = accumulated_histogram.get_value_at_percentile(50.0)
    print("Merged p50(us) for command {}: {} us. Total observations: {}".format(
        cmd, accumulated_p50, accumulated_histogram.total_count))
    accumulated_histogram.reset()

	# To find the percentile of a combined dataset of latency values from two Redis servers,
	# you would first need to retrieve the latency data from each server and combine it into
	# a single dataset.
	# Once you have the combined dataset, you can calculate the desired percentile.

	import redis
	from hdrh.histogram import HdrHistogram

	# connect to shard 1
	shard1_conn = redis.StrictRedis(port=12000)

	# connect to shard 2
	shard2_conn = redis.StrictRedis(port=12001)

	shard_conns = [shard1_conn, shard2_conn]

	commands = []
	for shard_conn in shard_conns:
	cmdstats = shard_conn.info("commandstats")
	for cmd_with_prefix in cmdstats.keys():
	# remove 'cmdstat_' from 'cmdstat_ping'
	cmd = cmd_with_prefix[8:]
	# avoid config or any \|
	if "\|" in cmd or "config" in cmd:
	continue
	if cmd not in commands:
	commands.append(cmd)

	print("Will provide the merged metrics for {} disticnt commands".format(len(commands)))

	# histogram __init__ values
	LOWEST = 1
	HIGHEST = 3600 * 1000 * 1000
	SIGNIFICANT = 3

	for cmd in commands:
	accumulated_histogram = HdrHistogram(LOWEST, HIGHEST, SIGNIFICANT)
	for shard_conn in shard_conns:
	# sample [b'get', [b'calls', 100002, b'histogram_usec', [1, 99805, 2, 99821, 4, 99915, 8, 99969, 16, 99999, 33, 100002]]]
	histogram_detail = shard_conn.execute_command(
	"latency", "histogram", cmd)
	if len(histogram_detail) == 0:
	continue
	# sample: [1, 99805, 2, 99821, 4, 99915, 8, 99969, 16, 99999, 33, 100002]
	histogram_usec = (histogram_detail[1])[3]
	n_buckets = len(histogram_usec) / 2
	bucket_pos = 0
	previous_count = 0
	# notice this is a cumulative histogram so you need to reduct the previous bucket count
	while bucket_pos < n_buckets:
	latency_us = histogram_usec[bucket_pos * 2]
	cummulative_observations = histogram_usec[bucket_pos * 2 + 1]
	observations = cummulative_observations - previous_count
	accumulated_histogram.record_value(latency_us, observations)
	bucket_pos = bucket_pos + 1
	previous_count = cummulative_observations

	accumulated_p50 = accumulated_histogram.get_value_at_percentile(50.0)
	print("Merged p50(us) for command {}: {} us. Total observations: {}".format(
	cmd, accumulated_p50, accumulated_histogram.total_count))
	accumulated_histogram.reset()