-
-
Save mikeconley/1888194c341e3d4da87207540603e708 to your computer and use it in GitHub Desktop.
Spinner hang stacks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[92]: | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records | |
get_ipython().magic(u'pylab inline') | |
# In[93]: | |
sc.defaultParallelism | |
# In[94]: | |
pings = get_pings(sc, app="Firefox", channel="nightly", submission_date="20161015", fraction=0.5) | |
# Let's only look at pings with child payloads... | |
# In[95]: | |
def has_child_payloads(ping): | |
return "childPayloads" in ping["payload"] | |
child_payload_pings = pings.filter(has_child_payloads) | |
# This is the big, complicated filter. We're going through each childPayload in each ping, and for each one, seeing if there's a `threadHangStats`. | |
# | |
# If there's a `threadHangStats`, we look at the `Gecko_Child` entries, and look for any that have the `ForcePaintInProgress` annotation. | |
# | |
# We yield stacks for hangs that last longer than 100ms for now. | |
# | |
# **This could probably be simplified a bit. Sorry about that.** | |
# In[96]: | |
def get_stacks(subset): | |
def yield_ping_stacks(ping): | |
for child_payload in ping["payload"]["childPayloads"]: | |
if not "threadHangStats" in child_payload: | |
continue | |
for thread in child_payload["threadHangStats"]: | |
if thread["name"] != "Gecko_Child": | |
continue | |
for hang in thread["hangs"]: | |
if not hang["stack"]: | |
continue | |
if "annotations" not in hang: | |
continue | |
was_for_tab_switch = False | |
for annotation in hang["annotations"]: | |
if "ForcePaintInProgress" in annotation: | |
was_for_tab_switch = True | |
break | |
if not was_for_tab_switch: | |
continue | |
values = hang["histogram"]["values"] | |
histogram = pd.Series(values.values(), index=map(int, values.keys())).sort_index() | |
min_ms = 100 | |
over_min_ms_count = histogram[histogram.index > min_ms].sum() | |
yield (tuple(hang["stack"]), over_min_ms_count) | |
return subset.flatMap(yield_ping_stacks).reduceByKey(lambda a, b: a + b).collectAsMap() | |
child_hang_stacks = get_stacks(child_payload_pings) | |
# Now that we have all of these stacks, let's group them by the top-most frame to give us some sense of order. | |
# | |
# We'll be outputting a structure like this: | |
# | |
# ``` | |
# [ | |
# { | |
# 'frame': (top-most-frame), | |
# 'hits': (number of hits), | |
# 'stacks': [ | |
# (big ol' list of stacks) | |
# ] | |
# }, | |
# ... | |
# ] | |
# ``` | |
# | |
# In[97]: | |
def group_by_top_frame(stacks): | |
total_hits = 0 | |
top_frames = {} | |
for stack, hits in stacks.iteritems(): | |
stack_top_frame = stack[-1] | |
if not stack_top_frame in top_frames: | |
top_frames[stack_top_frame] = { "frame": stack_top_frame, "stacks": [], "hits": 0 } | |
top_frame = top_frames[stack_top_frame] | |
# Keep stacks sorted by hits. | |
top_frame["stacks"].append((stack, hits)) | |
top_frame["stacks"].sort(key=lambda d: d[1], reverse=True) | |
top_frame["hits"] += hits | |
total_hits += hits | |
return top_frames, total_hits | |
def get_stack_hits(stacks, stack): | |
for s, h in stacks: | |
if s == stack: | |
return h | |
return 0 | |
top_frames, total_hits = group_by_top_frame(child_hang_stacks) | |
# Now let's sort by the number of hits on that top-frame to produce our final list. | |
# In[98]: | |
sorted_groups = sorted(top_frames.values(), key=lambda d: d["hits"], reverse=True) | |
# In[99]: | |
sorted_groups | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment