Create a gist now

Instantly share code, notes, and snippets.

@chutten /first stacks.ipynb Secret
Last active Jan 26, 2017

What would you like to do?
first stacks
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# ### When a user crashes for the first time, what is that crash?
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import requests
from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# In[2]:
sc.defaultParallelism
# In[3]:
pings = Dataset.from_source("telemetry") .where(docType='crash') .where(appUpdateChannel=lambda c: c in ('nightly')) .where(submissionDate=lambda d: d >= "20170101" and d <= "20170112") .records(sc, sample=1)
# In[4]:
subset = get_pings_properties(pings, [
"clientId",
"submissionDate",
"payload/stackTraces",
"environment/profile/creationDate"])\
.filter(lambda p: p["payload/stackTraces"] is not None)
# In[5]:
subset.count()
# First we need to filter out any profile older than our survey period.
# In[6]:
import datetime
survey_begin = (datetime.datetime(2017, 1, 1) - datetime.datetime(1970, 1, 1)).days
subset = subset.filter(lambda p: p["environment/profile/creationDate"] >= survey_begin)
# In[7]:
subset.count()
# So now we have a survey subset of pings with stack information. We want to get the first crash for each client reporting a crash, so...
# In[8]:
firsts = subset .map(lambda p: (p["clientId"], p)) .reduceByKey(lambda a, b: a if a["submissionDate"] < b["submissionDate"] else b) .map(lambda pair: pair[1])
# In[9]:
firsts.count()
# In[10]:
def symbolicate(s):
data = json.dumps({
'stacks': [[[f['module_index'], int(f['ip'], 16) - int(s['modules'][f['module_index']]['base_addr'], 16)] for f in s['threads'][s['crash_info']['crashing_thread']]['frames']]],
'memoryMap': [[m['debug_file'].translate(dict((ord(char), None) for char in ' ()')), m['debug_id']] for m in s['modules']], 'version': 4})
result = requests.post('http://symbolapi.mozilla.org/', data=data)
result_json = result.json()
return result_json['symbolicatedStacks']
# In[11]:
def safe_symbolicate(s):
try:
return symbolicate(s)
except Exception, e:
return e
# In[12]:
symbolicated_firsts = firsts.map(lambda p: (p["environment/profile/creationDate"], safe_symbolicate(p["payload/stackTraces"])))
# In[13]:
symbolicated_firsts.take(3)
# In[18]:
stack_counts = symbolicated_firsts.map(lambda pair: (str(pair[1]), 1)).countByKey()
# In[19]:
import operator
sorted_stack_counts = sorted(stack_counts.items(), key=operator.itemgetter(1), reverse=True)
# In[20]:
sorted_stack_counts[:10]
# Seeing a lot of shutdown crashes (RunWatchdog) which is exactly what I'd expect. Nice validation there.
#
# Things to do:
# * figure out where `'Expecting value: line 1 column 1 (char 0)'` is coming from. It's from a json lib somewhere, but is it the request or the response or does it come from the server somehow?
# * Determine if there's anything we can do with frames missing a module index (JIT frames, maybe?).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment