# coding: utf-8
# ### When a user crashes for the first time, what is that crash?
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import requests
from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# In[2]:
# In[3]:
pings = Dataset.from_source("telemetry") .where(docType='crash') .where(appUpdateChannel=lambda c: c in ('nightly')) .where(submissionDate=lambda d: d >= "20170101" and d <= "20170112") .records(sc, sample=1)
# In[4]:
subset = get_pings_properties(pings, [
.filter(lambda p: p["payload/stackTraces"] is not None)
# In[5]:
# First we need to filter out any profile older than our survey period.
# In[6]:
import datetime
survey_begin = (datetime.datetime(2017, 1, 1) - datetime.datetime(1970, 1, 1)).days
subset = subset.filter(lambda p: p["environment/profile/creationDate"] >= survey_begin)
# In[7]:
# So now we have a survey subset of pings with stack information. We want to get the first crash for each client reporting a crash, so...
# In[8]:
firsts = subset .map(lambda p: (p["clientId"], p)) .reduceByKey(lambda a, b: a if a["submissionDate"] < b["submissionDate"] else b) .map(lambda pair: pair[1])
# In[9]:
# In[10]:
def symbolicate(s):
data = json.dumps({
'stacks': [[[f['module_index'], int(f['ip'], 16) - int(s['modules'][f['module_index']]['base_addr'], 16)] for f in s['threads'][s['crash_info']['crashing_thread']]['frames']]],
'memoryMap': [[m['debug_file'].translate(dict((ord(char), None) for char in ' ()')), m['debug_id']] for m in s['modules']], 'version': 4})
result ='', data=data)
result_json = result.json()
return result_json['symbolicatedStacks']
# In[11]:
def safe_symbolicate(s):
return symbolicate(s)
except Exception, e:
return e
# In[12]:
symbolicated_firsts = p: (p["environment/profile/creationDate"], safe_symbolicate(p["payload/stackTraces"])))
# In[13]:
# In[18]:
stack_counts = pair: (str(pair[1]), 1)).countByKey()
# In[19]:
import operator
sorted_stack_counts = sorted(stack_counts.items(), key=operator.itemgetter(1), reverse=True)
# In[20]:
# Seeing a lot of shutdown crashes (RunWatchdog) which is exactly what I'd expect. Nice validation there.
# Things to do:
# * figure out where `'Expecting value: line 1 column 1 (char 0)'` is coming from. It's from a json lib somewhere, but is it the request or the response or does it come from the server somehow?
# * Determine if there's anything we can do with frames missing a module index (JIT frames, maybe?).
