Skip to content

Instantly share code, notes, and snippets.

@Dexterp37
Last active October 2, 2015 15:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dexterp37/6012d3e715095e2e369a to your computer and use it in GitHub Desktop.
Save Dexterp37/6012d3e715095e2e369a to your computer and use it in GitHub Desktop.
Analyse gzip'd ping files to find where the bulk of the data is.
#!/c/mozilla-build/python/python.exe
#
# Analyse gzip'd ping files to find where the bulk of the data is.
#
# Usage: analyse.py PATH_WITH_PINGS [threshold_percentage]
#
# Default threshold percentage is 60%, meaning that ping sections containing more than
# 60% of the ping data are shown.
#
import argparse
import gzip
import json
import os
# Read a gzip ping file a returns a (content-size, json object) tuple.
def read_gzip_file_as_json(filename):
unzippedFile = gzip.open(filename, 'rb')
data = json.load(unzippedFile)
unzippedFile.close()
size = len(json.dumps(data))
return size, data
# Traverse the JSON ping dictionary structure, and compute the size for
# each entry.
def traverse_and_summarise_ping(json_root, size_report_dict, path=""):
try:
for entry in json_root:
# Build the path for this entry and compute itr size in bytes.
leaf_path = path + "/" + entry
entry_as_string = json.dumps(json_root[entry])
leaf_size = len(entry_as_string)
# Save the size of this leaf in the dictionary
size_report_dict[leaf_path] = leaf_size
# Keep traversing.
traverse_and_summarise_ping(json_root[entry], size_report_dict, leaf_path)
except TypeError:
# The entry is not iterable. Skip it.
pass
# Analyse threadHangStats, in particular.
def analyse_threadHangStats(json_root, size_report_dict):
if json_root["type"] != "main":
print("Skipping threadHangStats analysis for " + json_root["type"])
return
thread_hangs = json_root["payload"]["threadHangStats"]
for thread in thread_hangs:
# Encode the thread name into the path, to help with the analysis.
thread_path = "/payload/threadHangStats/" + thread["name"]
# Analyse each thread and report the size in the dictionary.
traverse_and_summarise_ping(thread, size_report_dict, thread_path)
# If this thread contains no hangs, skip the next part.
hang_number = len(thread["hangs"])
if hang_number <= 0:
continue
hang_index = 0
hang_size_average = 0
hang_size_min = 9999999999999999
hang_size_max = -1
print(thread["name"] + " hangs: " + str(hang_number))
# Loop through each thread's hangs.
for hang in thread["hangs"]:
hang_index = hang_index + 1
# Encode the thread name into the path, to help with the analysis.
hang_path = thread_path + "/" + str(hang_index)
# Analyse each thread and report the size in the dictionary.
traverse_and_summarise_ping(hang, size_report_dict, hang_path)
# Gather some stats.
hang_size = len(json.dumps(hang))
hang_size_average = hang_size_average + hang_size
if hang_size > hang_size_max:
hang_size_max = hang_size
elif hang_size < hang_size_min:
hang_size_min = hang_size
# Compute the average and print the stats.
hang_size_average = hang_size_average / hang_number
print("Hang stats: min " + str(hang_size_min) + " max " + str(hang_size_max) + " avg " + str(hang_size_average))
# Analyse a ping file.
def analyse_ping(filename):
# Read the ping from file.
print("Analysing " + filename)
ping_size, ping_json = read_gzip_file_as_json(filename)
# Traverse the JSON structure to find the size, in byte, of each leaf.
print("Ping type " + ping_json["type"] + " - size " + str(ping_size))
size_dict = {}
traverse_and_summarise_ping(ping_json, size_dict)
# Let's dive deeper into the threadHangStats.
analyse_threadHangStats(ping_json, size_dict)
# We only want to report leaves that take more than the 60% of the ping size.
global gArgs
alarming_threshold = ping_size * (gArgs.percentage * 0.01)
# Sort the size dictionary by its value (the size), the filter out the elements above
# the alarm threshold.
heavy_weights = [leaf for leaf in sorted(size_dict, key=size_dict.get, reverse=True) if size_dict[leaf] >= alarming_threshold]
# Finally print the biggest leaves.
for leaf in heavy_weights:
percentage_size = (size_dict[leaf] * 100) / ping_size
print(leaf + " - " + str(size_dict[leaf]) + "(" + str(percentage_size) + "%)")
# Pare the command line arguments.
parser = argparse.ArgumentParser(description="Ping bloat analysis script.")
parser.add_argument('path', help='the path containing the gzipped pings')
parser.add_argument("-P", "--percentage", dest="percentage", default=60, type=int,
help="Notify the bloat bigger than this percentage of the ping size.")
gArgs = parser.parse_args()
ping_path = gArgs.path
print("Starting analysis in " + ping_path + " with a " + str(gArgs.percentage) + "% threshold")
# Enumerate all the files in the provided path
onlyfiles = [ os.path.join(ping_path, f) for f in os.listdir(ping_path) if os.path.isfile(os.path.join(ping_path, f)) ]
# Analyse each ping file
for file in onlyfiles:
analyse_ping(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment