Skip to content

Instantly share code, notes, and snippets.

@chutten
Created November 15, 2016 21:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chutten/7a5bfc86f29a4292b9af45c48a542f41 to your computer and use it in GitHub Desktop.
Save chutten/7a5bfc86f29a4292b9af45c48a542f41 to your computer and use it in GitHub Desktop.
subprocess_crash_submit
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Subprocess Crash Submission Rates"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning:\n",
"\n",
"pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n",
"\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"160"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", submission_date=\"20161101\", fraction=0.4)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"subset = get_pings_properties(pings, [\"clientId\",\n",
" \"application/channel\",\n",
" \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\",\n",
" \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\",\n",
" \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\",\n",
" \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\",\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"subset = subset.filter(lambda p: p[\"application/channel\"] is not None and\n",
" (p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\"] is not None or\n",
" p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\"] is not None or\n",
" p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\"] is not None or\n",
" p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\"] is not None))"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int,\n",
" {u'aurora-content': 2829,\n",
" u'aurora-plugin': 292,\n",
" u'beta-content': 24546,\n",
" u'beta-plugin': 9761,\n",
" u'default-content': 5,\n",
" u'default-plugin': 5,\n",
" u'esr-content': 28,\n",
" u'esr-plugin': 70,\n",
" u'esr45-content': 1,\n",
" u'nightly-content': 1848,\n",
" u'nightly-elm-content': 3,\n",
" u'nightly-esr10-content': 13,\n",
" u'nightly-esr10-plugin': 3,\n",
" u'nightly-plugin': 65,\n",
" u'release-content': 8675,\n",
" u'release-plugin': 7559})"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def pair_or_none(p, prefix, hgram, crash):\n",
" return (prefix + crash, 1) if p[hgram + \"/\" + crash] is not None else None\n",
"\n",
"\n",
"def gimme_count(p, hgram, crashes):\n",
" prefix = p[\"application/channel\"] + \"-\"\n",
" return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]\n",
"\n",
"crash_counts = subset\\\n",
" .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP\", [\"content\", \"plugin\"]))\\\n",
" .filter(lambda pair: pair is not None)\\\n",
" .countByKey()\n",
"crash_counts"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int,\n",
" {u'aurora-content-crash': 139,\n",
" u'aurora-plugin-crash': 4,\n",
" u'beta-content-crash': 735,\n",
" u'beta-plugin-crash': 213,\n",
" u'default-content-crash': 1,\n",
" u'esr-plugin-crash': 1,\n",
" u'nightly-content-crash': 432,\n",
" u'nightly-esr10-content-crash': 3,\n",
" u'nightly-plugin-crash': 1,\n",
" u'release-content-crash': 377,\n",
" u'release-plugin-crash': 314})"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crash_reports = subset\\\n",
" .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS\", [\"content-crash\", \"plugin-crash\"]))\\\n",
" .filter(lambda pair: pair is not None)\\\n",
" .countByKey()\n",
"crash_reports"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{u'aurora-content-crash': 0.04913396960056557,\n",
" u'aurora-plugin-crash': 0.0136986301369863,\n",
" u'beta-content-crash': 0.02994377902713273,\n",
" u'beta-plugin-crash': 0.02182153467882389,\n",
" u'default-content-crash': 0.2,\n",
" u'esr-plugin-crash': 0.014285714285714285,\n",
" u'nightly-content-crash': 0.23376623376623376,\n",
" u'nightly-esr10-content-crash': 0.23076923076923078,\n",
" u'nightly-plugin-crash': 0.015384615384615385,\n",
" u'release-content-crash': 0.04345821325648415,\n",
" u'release-plugin-crash': 0.04153988622833708}"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# ### Subprocess Crash Submission Rates
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
get_ipython().magic(u'pylab inline')
# In[2]:
sc.defaultParallelism
# In[43]:
pings = get_pings(sc, app="Firefox", submission_date="20161101", fraction=0.4)
# In[44]:
subset = get_pings_properties(pings, ["clientId",
"application/channel",
"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash",
"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash",
"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content",
"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin",
])
# In[45]:
subset = subset.filter(lambda p: p["application/channel"] is not None and
(p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash"] is not None or
p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash"] is not None or
p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content"] is not None or
p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin"] is not None))
# In[46]:
def pair_or_none(p, prefix, hgram, crash):
return (prefix + crash, 1) if p[hgram + "/" + crash] is not None else None
def gimme_count(p, hgram, crashes):
prefix = p["application/channel"] + "-"
return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]
crash_counts = subset .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP", ["content", "plugin"])) .filter(lambda pair: pair is not None) .countByKey()
crash_counts
# In[47]:
crash_reports = subset .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS", ["content-crash", "plugin-crash"])) .filter(lambda pair: pair is not None) .countByKey()
crash_reports
# In[48]:
{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment