mikeconley/Spinner hang stacks.ipynb Secret

## Spinner hang stacks.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Spinner hang stacks.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Spinner hang stacks.py

# coding: utf-8

# In[92]:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

get_ipython().magic(u'pylab inline')


# In[93]:

sc.defaultParallelism


# In[94]:

pings = get_pings(sc, app="Firefox", channel="nightly", submission_date="20161015", fraction=0.5)


# Let's only look at pings with child payloads...

# In[95]:

def has_child_payloads(ping):
    return "childPayloads" in ping["payload"]

child_payload_pings = pings.filter(has_child_payloads)


# This is the big, complicated filter. We're going through each childPayload in each ping, and for each one, seeing if there's a `threadHangStats`.
#
# If there's a `threadHangStats`, we look at the `Gecko_Child` entries, and look for any that have the `ForcePaintInProgress` annotation.
#
# We yield stacks for hangs that last longer than 100ms for now.
#
# **This could probably be simplified a bit. Sorry about that.**

# In[96]:

def get_stacks(subset):
    def yield_ping_stacks(ping):
        for child_payload in ping["payload"]["childPayloads"]:
            if not "threadHangStats" in child_payload:
                continue
            for thread in child_payload["threadHangStats"]:
                if thread["name"] != "Gecko_Child":
                    continue
                for hang in thread["hangs"]:
                    if not hang["stack"]:
                        continue

                    if "annotations" not in hang:
                        continue

                    was_for_tab_switch = False
                    for annotation in hang["annotations"]:
                        if "ForcePaintInProgress" in annotation:
                            was_for_tab_switch = True
                            break

                    if not was_for_tab_switch:
                        continue

                    values = hang["histogram"]["values"]
                    histogram = pd.Series(values.values(), index=map(int, values.keys())).sort_index()
                    min_ms = 100
                    over_min_ms_count = histogram[histogram.index > min_ms].sum()
                    yield (tuple(hang["stack"]), over_min_ms_count)

    return subset.flatMap(yield_ping_stacks).reduceByKey(lambda a, b: a + b).collectAsMap()

child_hang_stacks = get_stacks(child_payload_pings)


# Now that we have all of these stacks, let's group them by the top-most frame to give us some sense of order.
#
# We'll be outputting a structure like this:
#
# ```
# [
#   {
#     'frame': (top-most-frame),
#     'hits': (number of hits),
#     'stacks': [
#       (big ol' list of stacks)
#     ]
#   },
#   ...
# ]
# ```
#

# In[97]:

def group_by_top_frame(stacks):
    total_hits = 0
    top_frames = {}
    for stack, hits in stacks.iteritems():
        stack_top_frame = stack[-1]
        if not stack_top_frame in top_frames:
            top_frames[stack_top_frame] = { "frame": stack_top_frame, "stacks": [], "hits": 0 }

        top_frame = top_frames[stack_top_frame]

        # Keep stacks sorted by hits.
        top_frame["stacks"].append((stack, hits))
        top_frame["stacks"].sort(key=lambda d: d[1], reverse=True)

        top_frame["hits"] += hits
        total_hits += hits

    return top_frames, total_hits

def get_stack_hits(stacks, stack):
    for s, h in stacks:
        if s == stack:
            return h
    return 0

top_frames, total_hits = group_by_top_frame(child_hang_stacks)


# Now let's sort by the number of hits on that top-frame to produce our final list.

# In[98]:

sorted_groups = sorted(top_frames.values(), key=lambda d: d["hits"], reverse=True)


# In[99]:

sorted_groups


# In[ ]:

	# coding: utf-8

	# In[92]:

	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np
	import plotly.plotly as py

	from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

	get_ipython().magic(u'pylab inline')


	# In[93]:

	sc.defaultParallelism


	# In[94]:

	pings = get_pings(sc, app="Firefox", channel="nightly", submission_date="20161015", fraction=0.5)


	# Let's only look at pings with child payloads...

	# In[95]:

	def has_child_payloads(ping):
	return "childPayloads" in ping["payload"]

	child_payload_pings = pings.filter(has_child_payloads)


	# This is the big, complicated filter. We're going through each childPayload in each ping, and for each one, seeing if there's a `threadHangStats`.
	#
	# If there's a `threadHangStats`, we look at the `Gecko_Child` entries, and look for any that have the `ForcePaintInProgress` annotation.
	#
	# We yield stacks for hangs that last longer than 100ms for now.
	#
	# This could probably be simplified a bit. Sorry about that.

	# In[96]:

	def get_stacks(subset):
	def yield_ping_stacks(ping):
	for child_payload in ping["payload"]["childPayloads"]:
	if not "threadHangStats" in child_payload:
	continue
	for thread in child_payload["threadHangStats"]:
	if thread["name"] != "Gecko_Child":
	continue
	for hang in thread["hangs"]:
	if not hang["stack"]:
	continue

	if "annotations" not in hang:
	continue

	was_for_tab_switch = False
	for annotation in hang["annotations"]:
	if "ForcePaintInProgress" in annotation:
	was_for_tab_switch = True
	break

	if not was_for_tab_switch:
	continue

	values = hang["histogram"]["values"]
	histogram = pd.Series(values.values(), index=map(int, values.keys())).sort_index()
	min_ms = 100
	over_min_ms_count = histogram[histogram.index > min_ms].sum()
	yield (tuple(hang["stack"]), over_min_ms_count)

	return subset.flatMap(yield_ping_stacks).reduceByKey(lambda a, b: a + b).collectAsMap()

	child_hang_stacks = get_stacks(child_payload_pings)


	# Now that we have all of these stacks, let's group them by the top-most frame to give us some sense of order.
	#
	# We'll be outputting a structure like this:
	#
	# ```
	# [
	# {
	# 'frame': (top-most-frame),
	# 'hits': (number of hits),
	# 'stacks': [
	# (big ol' list of stacks)
	# ]
	# },
	# ...
	# ]
	# ```
	#

	# In[97]:

	def group_by_top_frame(stacks):
	total_hits = 0
	top_frames = {}
	for stack, hits in stacks.iteritems():
	stack_top_frame = stack[-1]
	if not stack_top_frame in top_frames:
	top_frames[stack_top_frame] = { "frame": stack_top_frame, "stacks": [], "hits": 0 }

	top_frame = top_frames[stack_top_frame]

	# Keep stacks sorted by hits.
	top_frame["stacks"].append((stack, hits))
	top_frame["stacks"].sort(key=lambda d: d[1], reverse=True)

	top_frame["hits"] += hits
	total_hits += hits

	return top_frames, total_hits

	def get_stack_hits(stacks, stack):
	for s, h in stacks:
	if s == stack:
	return h
	return 0

	top_frames, total_hits = group_by_top_frame(child_hang_stacks)


	# Now let's sort by the number of hits on that top-frame to produce our final list.

	# In[98]:

	sorted_groups = sorted(top_frames.values(), key=lambda d: d["hits"], reverse=True)


	# In[99]:

	sorted_groups


	# In[ ]: