chutten/subprocess_crash_submit.ipynb Secret

## subprocess_crash_submit.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Subprocess Crash Submission Rates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n",
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/hadoop/anaconda2/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning:\n",
      "\n",
      "pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
      "`%matplotlib` prevents importing * from pylab and numpy\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import ujson as json\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import plotly.plotly as py\n",
    "from plotly.graph_objs import *\n",
    "\n",
    "from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
    "\n",
    "%pylab inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "160"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sc.defaultParallelism"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pings = get_pings(sc, app=\"Firefox\", submission_date=\"20161101\", fraction=0.4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subset = get_pings_properties(pings, [\"clientId\",\n",
    "                                      \"application/channel\",\n",
    "                                      \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\",\n",
    "                                      \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\",\n",
    "                                      \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\",\n",
    "                                      \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\",\n",
    "                                     ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subset = subset.filter(lambda p: p[\"application/channel\"] is not None and\n",
    "                      (p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\"] is not None or\n",
    "                      p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\"] is not None or\n",
    "                      p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\"] is not None or\n",
    "                      p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\"] is not None))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {u'aurora-content': 2829,\n",
       "             u'aurora-plugin': 292,\n",
       "             u'beta-content': 24546,\n",
       "             u'beta-plugin': 9761,\n",
       "             u'default-content': 5,\n",
       "             u'default-plugin': 5,\n",
       "             u'esr-content': 28,\n",
       "             u'esr-plugin': 70,\n",
       "             u'esr45-content': 1,\n",
       "             u'nightly-content': 1848,\n",
       "             u'nightly-elm-content': 3,\n",
       "             u'nightly-esr10-content': 13,\n",
       "             u'nightly-esr10-plugin': 3,\n",
       "             u'nightly-plugin': 65,\n",
       "             u'release-content': 8675,\n",
       "             u'release-plugin': 7559})"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def pair_or_none(p, prefix, hgram, crash):\n",
    "    return (prefix + crash, 1) if p[hgram + \"/\" + crash] is not None else None\n",
    "\n",
    "\n",
    "def gimme_count(p, hgram, crashes):\n",
    "    prefix = p[\"application/channel\"] + \"-\"\n",
    "    return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]\n",
    "\n",
    "crash_counts = subset\\\n",
    "                .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP\", [\"content\", \"plugin\"]))\\\n",
    "                .filter(lambda pair: pair is not None)\\\n",
    "                .countByKey()\n",
    "crash_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {u'aurora-content-crash': 139,\n",
       "             u'aurora-plugin-crash': 4,\n",
       "             u'beta-content-crash': 735,\n",
       "             u'beta-plugin-crash': 213,\n",
       "             u'default-content-crash': 1,\n",
       "             u'esr-plugin-crash': 1,\n",
       "             u'nightly-content-crash': 432,\n",
       "             u'nightly-esr10-content-crash': 3,\n",
       "             u'nightly-plugin-crash': 1,\n",
       "             u'release-content-crash': 377,\n",
       "             u'release-plugin-crash': 314})"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "crash_reports = subset\\\n",
    "                    .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS\", [\"content-crash\", \"plugin-crash\"]))\\\n",
    "                    .filter(lambda pair: pair is not None)\\\n",
    "                    .countByKey()\n",
    "crash_reports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{u'aurora-content-crash': 0.04913396960056557,\n",
       " u'aurora-plugin-crash': 0.0136986301369863,\n",
       " u'beta-content-crash': 0.02994377902713273,\n",
       " u'beta-plugin-crash': 0.02182153467882389,\n",
       " u'default-content-crash': 0.2,\n",
       " u'esr-plugin-crash': 0.014285714285714285,\n",
       " u'nightly-content-crash': 0.23376623376623376,\n",
       " u'nightly-esr10-content-crash': 0.23076923076923078,\n",
       " u'nightly-plugin-crash': 0.015384615384615385,\n",
       " u'release-content-crash': 0.04345821325648415,\n",
       " u'release-plugin-crash': 0.04153988622833708}"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## subprocess_crash_submit.py

# coding: utf-8

# ### Subprocess Crash Submission Rates

# In[1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

get_ipython().magic(u'pylab inline')


# In[2]:

sc.defaultParallelism


# In[43]:

pings = get_pings(sc, app="Firefox", submission_date="20161101", fraction=0.4)


# In[44]:

subset = get_pings_properties(pings, ["clientId",
                                      "application/channel",
                                      "payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash",
                                      "payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash",
                                      "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content",
                                      "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin",
                                     ])


# In[45]:

subset = subset.filter(lambda p: p["application/channel"] is not None and
                      (p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash"] is not None or
                      p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash"] is not None or
                      p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content"] is not None or
                      p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin"] is not None))


# In[46]:

def pair_or_none(p, prefix, hgram, crash):
    return (prefix + crash, 1) if p[hgram + "/" + crash] is not None else None


def gimme_count(p, hgram, crashes):
    prefix = p["application/channel"] + "-"
    return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]

crash_counts = subset                .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP", ["content", "plugin"]))                .filter(lambda pair: pair is not None)                .countByKey()
crash_counts


# In[47]:

crash_reports = subset                    .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS", ["content-crash", "plugin-crash"]))                    .filter(lambda pair: pair is not None)                    .countByKey()
crash_reports


# In[48]:

{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Subprocess Crash Submission Rates"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false,
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n",
	"Populating the interactive namespace from numpy and matplotlib\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/home/hadoop/anaconda2/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning:\n",
	"\n",
	"pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
	"`%matplotlib` prevents importing * from pylab and numpy\n",
	"\n"
	]
	}
	],
	"source": [
	"import ujson as json\n",
	"import matplotlib.pyplot as plt\n",
	"import pandas as pd\n",
	"import numpy as np\n",
	"import plotly.plotly as py\n",
	"from plotly.graph_objs import *\n",
	"\n",
	"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
	"\n",
	"%pylab inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"160"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sc.defaultParallelism"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 43,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"pings = get_pings(sc, app=\"Firefox\", submission_date=\"20161101\", fraction=0.4)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 44,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"subset = get_pings_properties(pings, [\"clientId\",\n",
	" \"application/channel\",\n",
	" \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\",\n",
	" \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\",\n",
	" \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\",\n",
	" \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\",\n",
	" ])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 45,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"subset = subset.filter(lambda p: p[\"application/channel\"] is not None and\n",
	" (p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash\"] is not None or\n",
	" p[\"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash\"] is not None or\n",
	" p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content\"] is not None or\n",
	" p[\"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin\"] is not None))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"defaultdict(int,\n",
	" {u'aurora-content': 2829,\n",
	" u'aurora-plugin': 292,\n",
	" u'beta-content': 24546,\n",
	" u'beta-plugin': 9761,\n",
	" u'default-content': 5,\n",
	" u'default-plugin': 5,\n",
	" u'esr-content': 28,\n",
	" u'esr-plugin': 70,\n",
	" u'esr45-content': 1,\n",
	" u'nightly-content': 1848,\n",
	" u'nightly-elm-content': 3,\n",
	" u'nightly-esr10-content': 13,\n",
	" u'nightly-esr10-plugin': 3,\n",
	" u'nightly-plugin': 65,\n",
	" u'release-content': 8675,\n",
	" u'release-plugin': 7559})"
	]
	},
	"execution_count": 46,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"def pair_or_none(p, prefix, hgram, crash):\n",
	" return (prefix + crash, 1) if p[hgram + \"/\" + crash] is not None else None\n",
	"\n",
	"\n",
	"def gimme_count(p, hgram, crashes):\n",
	" prefix = p[\"application/channel\"] + \"-\"\n",
	" return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]\n",
	"\n",
	"crash_counts = subset\\\n",
	" .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP\", [\"content\", \"plugin\"]))\\\n",
	" .filter(lambda pair: pair is not None)\\\n",
	" .countByKey()\n",
	"crash_counts"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 47,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"defaultdict(int,\n",
	" {u'aurora-content-crash': 139,\n",
	" u'aurora-plugin-crash': 4,\n",
	" u'beta-content-crash': 735,\n",
	" u'beta-plugin-crash': 213,\n",
	" u'default-content-crash': 1,\n",
	" u'esr-plugin-crash': 1,\n",
	" u'nightly-content-crash': 432,\n",
	" u'nightly-esr10-content-crash': 3,\n",
	" u'nightly-plugin-crash': 1,\n",
	" u'release-content-crash': 377,\n",
	" u'release-plugin-crash': 314})"
	]
	},
	"execution_count": 47,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"crash_reports = subset\\\n",
	" .flatMap(lambda p: gimme_count(p, \"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS\", [\"content-crash\", \"plugin-crash\"]))\\\n",
	" .filter(lambda pair: pair is not None)\\\n",
	" .countByKey()\n",
	"crash_reports"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 48,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{u'aurora-content-crash': 0.04913396960056557,\n",
	" u'aurora-plugin-crash': 0.0136986301369863,\n",
	" u'beta-content-crash': 0.02994377902713273,\n",
	" u'beta-plugin-crash': 0.02182153467882389,\n",
	" u'default-content-crash': 0.2,\n",
	" u'esr-plugin-crash': 0.014285714285714285,\n",
	" u'nightly-content-crash': 0.23376623376623376,\n",
	" u'nightly-esr10-content-crash': 0.23076923076923078,\n",
	" u'nightly-plugin-crash': 0.015384615384615385,\n",
	" u'release-content-crash': 0.04345821325648415,\n",
	" u'release-plugin-crash': 0.04153988622833708}"
	]
	},
	"execution_count": 48,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}"
	]
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [default]",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}

	# coding: utf-8

	# ### Subprocess Crash Submission Rates

	# In[1]:

	import ujson as json
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np
	import plotly.plotly as py
	from plotly.graph_objs import *

	from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

	get_ipython().magic(u'pylab inline')


	# In[2]:

	sc.defaultParallelism


	# In[43]:

	pings = get_pings(sc, app="Firefox", submission_date="20161101", fraction=0.4)


	# In[44]:

	subset = get_pings_properties(pings, ["clientId",
	"application/channel",
	"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash",
	"payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash",
	"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content",
	"payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin",
	])


	# In[45]:

	subset = subset.filter(lambda p: p["application/channel"] is not None and
	(p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/content-crash"] is not None or
	p["payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS/plugin-crash"] is not None or
	p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/content"] is not None or
	p["payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP/plugin"] is not None))


	# In[46]:

	def pair_or_none(p, prefix, hgram, crash):
	return (prefix + crash, 1) if p[hgram + "/" + crash] is not None else None


	def gimme_count(p, hgram, crashes):
	prefix = p["application/channel"] + "-"
	return [pair_or_none(p, prefix, hgram, crash) for crash in crashes]

	crash_counts = subset .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/SUBPROCESS_CRASHES_WITH_DUMP", ["content", "plugin"])) .filter(lambda pair: pair is not None) .countByKey()
	crash_counts


	# In[47]:

	crash_reports = subset .flatMap(lambda p: gimme_count(p, "payload/keyedHistograms/PROCESS_CRASH_SUBMIT_SUCCESS", ["content-crash", "plugin-crash"])) .filter(lambda pair: pair is not None) .countByKey()
	crash_reports


	# In[48]:

	{k: (1.0 * v / crash_counts.get(k[:-6], v)) for (k, v) in crash_reports.iteritems()}