Last active
October 2, 2015 15:45
-
-
Save Dexterp37/6012d3e715095e2e369a to your computer and use it in GitHub Desktop.
Analyse gzip'd ping files to find where the bulk of the data is.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/c/mozilla-build/python/python.exe | |
# | |
# Analyse gzip'd ping files to find where the bulk of the data is. | |
# | |
# Usage: analyse.py PATH_WITH_PINGS [threshold_percentage] | |
# | |
# Default threshold percentage is 60%, meaning that ping sections containing more than | |
# 60% of the ping data are shown. | |
# | |
import argparse | |
import gzip | |
import json | |
import os | |
# Read a gzip ping file a returns a (content-size, json object) tuple. | |
def read_gzip_file_as_json(filename): | |
unzippedFile = gzip.open(filename, 'rb') | |
data = json.load(unzippedFile) | |
unzippedFile.close() | |
size = len(json.dumps(data)) | |
return size, data | |
# Traverse the JSON ping dictionary structure, and compute the size for | |
# each entry. | |
def traverse_and_summarise_ping(json_root, size_report_dict, path=""): | |
try: | |
for entry in json_root: | |
# Build the path for this entry and compute itr size in bytes. | |
leaf_path = path + "/" + entry | |
entry_as_string = json.dumps(json_root[entry]) | |
leaf_size = len(entry_as_string) | |
# Save the size of this leaf in the dictionary | |
size_report_dict[leaf_path] = leaf_size | |
# Keep traversing. | |
traverse_and_summarise_ping(json_root[entry], size_report_dict, leaf_path) | |
except TypeError: | |
# The entry is not iterable. Skip it. | |
pass | |
# Analyse threadHangStats, in particular. | |
def analyse_threadHangStats(json_root, size_report_dict): | |
if json_root["type"] != "main": | |
print("Skipping threadHangStats analysis for " + json_root["type"]) | |
return | |
thread_hangs = json_root["payload"]["threadHangStats"] | |
for thread in thread_hangs: | |
# Encode the thread name into the path, to help with the analysis. | |
thread_path = "/payload/threadHangStats/" + thread["name"] | |
# Analyse each thread and report the size in the dictionary. | |
traverse_and_summarise_ping(thread, size_report_dict, thread_path) | |
# If this thread contains no hangs, skip the next part. | |
hang_number = len(thread["hangs"]) | |
if hang_number <= 0: | |
continue | |
hang_index = 0 | |
hang_size_average = 0 | |
hang_size_min = 9999999999999999 | |
hang_size_max = -1 | |
print(thread["name"] + " hangs: " + str(hang_number)) | |
# Loop through each thread's hangs. | |
for hang in thread["hangs"]: | |
hang_index = hang_index + 1 | |
# Encode the thread name into the path, to help with the analysis. | |
hang_path = thread_path + "/" + str(hang_index) | |
# Analyse each thread and report the size in the dictionary. | |
traverse_and_summarise_ping(hang, size_report_dict, hang_path) | |
# Gather some stats. | |
hang_size = len(json.dumps(hang)) | |
hang_size_average = hang_size_average + hang_size | |
if hang_size > hang_size_max: | |
hang_size_max = hang_size | |
elif hang_size < hang_size_min: | |
hang_size_min = hang_size | |
# Compute the average and print the stats. | |
hang_size_average = hang_size_average / hang_number | |
print("Hang stats: min " + str(hang_size_min) + " max " + str(hang_size_max) + " avg " + str(hang_size_average)) | |
# Analyse a ping file. | |
def analyse_ping(filename): | |
# Read the ping from file. | |
print("Analysing " + filename) | |
ping_size, ping_json = read_gzip_file_as_json(filename) | |
# Traverse the JSON structure to find the size, in byte, of each leaf. | |
print("Ping type " + ping_json["type"] + " - size " + str(ping_size)) | |
size_dict = {} | |
traverse_and_summarise_ping(ping_json, size_dict) | |
# Let's dive deeper into the threadHangStats. | |
analyse_threadHangStats(ping_json, size_dict) | |
# We only want to report leaves that take more than the 60% of the ping size. | |
global gArgs | |
alarming_threshold = ping_size * (gArgs.percentage * 0.01) | |
# Sort the size dictionary by its value (the size), the filter out the elements above | |
# the alarm threshold. | |
heavy_weights = [leaf for leaf in sorted(size_dict, key=size_dict.get, reverse=True) if size_dict[leaf] >= alarming_threshold] | |
# Finally print the biggest leaves. | |
for leaf in heavy_weights: | |
percentage_size = (size_dict[leaf] * 100) / ping_size | |
print(leaf + " - " + str(size_dict[leaf]) + "(" + str(percentage_size) + "%)") | |
# Pare the command line arguments. | |
parser = argparse.ArgumentParser(description="Ping bloat analysis script.") | |
parser.add_argument('path', help='the path containing the gzipped pings') | |
parser.add_argument("-P", "--percentage", dest="percentage", default=60, type=int, | |
help="Notify the bloat bigger than this percentage of the ping size.") | |
gArgs = parser.parse_args() | |
ping_path = gArgs.path | |
print("Starting analysis in " + ping_path + " with a " + str(gArgs.percentage) + "% threshold") | |
# Enumerate all the files in the provided path | |
onlyfiles = [ os.path.join(ping_path, f) for f in os.listdir(ping_path) if os.path.isfile(os.path.join(ping_path, f)) ] | |
# Analyse each ping file | |
for file in onlyfiles: | |
analyse_ping(file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment