-
-
Save chutten/2063fe236a6ed46eb9b566dfa51ea755 to your computer and use it in GitHub Desktop.
first stacks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ### When a user crashes for the first time, what is that crash? | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
import requests | |
from plotly.graph_objs import * | |
from moztelemetry import get_pings_properties, get_one_ping_per_client | |
from moztelemetry.dataset import Dataset | |
get_ipython().magic(u'matplotlib inline') | |
# In[2]: | |
sc.defaultParallelism | |
# In[3]: | |
pings = Dataset.from_source("telemetry") .where(docType='crash') .where(appUpdateChannel=lambda c: c in ('nightly')) .where(submissionDate=lambda d: d >= "20170101" and d <= "20170112") .records(sc, sample=1) | |
# In[4]: | |
subset = get_pings_properties(pings, [ | |
"clientId", | |
"submissionDate", | |
"payload/stackTraces", | |
"environment/profile/creationDate"])\ | |
.filter(lambda p: p["payload/stackTraces"] is not None) | |
# In[5]: | |
subset.count() | |
# First we need to filter out any profile older than our survey period. | |
# In[6]: | |
import datetime | |
survey_begin = (datetime.datetime(2017, 1, 1) - datetime.datetime(1970, 1, 1)).days | |
subset = subset.filter(lambda p: p["environment/profile/creationDate"] >= survey_begin) | |
# In[7]: | |
subset.count() | |
# So now we have a survey subset of pings with stack information. We want to get the first crash for each client reporting a crash, so... | |
# In[8]: | |
firsts = subset .map(lambda p: (p["clientId"], p)) .reduceByKey(lambda a, b: a if a["submissionDate"] < b["submissionDate"] else b) .map(lambda pair: pair[1]) | |
# In[9]: | |
firsts.count() | |
# In[10]: | |
def symbolicate(s): | |
data = json.dumps({ | |
'stacks': [[[f['module_index'], int(f['ip'], 16) - int(s['modules'][f['module_index']]['base_addr'], 16)] for f in s['threads'][s['crash_info']['crashing_thread']]['frames']]], | |
'memoryMap': [[m['debug_file'].translate(dict((ord(char), None) for char in ' ()')), m['debug_id']] for m in s['modules']], 'version': 4}) | |
result = requests.post('http://symbolapi.mozilla.org/', data=data) | |
result_json = result.json() | |
return result_json['symbolicatedStacks'] | |
# In[11]: | |
def safe_symbolicate(s): | |
try: | |
return symbolicate(s) | |
except Exception, e: | |
return e | |
# In[12]: | |
symbolicated_firsts = firsts.map(lambda p: (p["environment/profile/creationDate"], safe_symbolicate(p["payload/stackTraces"]))) | |
# In[13]: | |
symbolicated_firsts.take(3) | |
# In[18]: | |
stack_counts = symbolicated_firsts.map(lambda pair: (str(pair[1]), 1)).countByKey() | |
# In[19]: | |
import operator | |
sorted_stack_counts = sorted(stack_counts.items(), key=operator.itemgetter(1), reverse=True) | |
# In[20]: | |
sorted_stack_counts[:10] | |
# Seeing a lot of shutdown crashes (RunWatchdog) which is exactly what I'd expect. Nice validation there. | |
# | |
# Things to do: | |
# * figure out where `'Expecting value: line 1 column 1 (char 0)'` is coming from. It's from a json lib somewhere, but is it the request or the response or does it come from the server somehow? | |
# * Determine if there's anything we can do with frames missing a module index (JIT frames, maybe?). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment