Skip to content

Instantly share code, notes, and snippets.

@vitillo
Last active January 28, 2016 11:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vitillo/ae0464d7f1dd39752ab6 to your computer and use it in GitHub Desktop.
Save vitillo/ae0464d7f1dd39752ab6 to your computer and use it in GitHub Desktop.
Activity Histogram Bucketing
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import IPython\n",
"import functools\n",
"\n",
"from __future__ import division\n",
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n",
"\n",
"%pylab inline\n",
"IPython.core.pylabtools.figsize(16, 7)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, channel=\"nightly\", submission_date=\"20160127\", build_id=(\"20160127000000\", \"20160127999999\"))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def gecko_activity(ping):\n",
" threads = ping[\"payload\"].get(\"threadHangStats\", {})\n",
" result = {}\n",
" \n",
" for thread in threads:\n",
" if thread[\"name\"] == \"Gecko\":\n",
" activity = thread[\"activity\"][\"values\"]\n",
" result[\"gecko_activity\"] = pd.Series(activity.values(), index=map(int, activity.keys())).sort_index()\n",
" \n",
" result[\"gecko_hangs\"] = pd.Series()\n",
" for hang in thread[\"hangs\"]:\n",
" histogram = hang[\"histogram\"][\"values\"]\n",
" hang_histogram = pd.Series(histogram.values(), index=map(int, histogram.keys()))\n",
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].add(hang_histogram, fill_value=0)\n",
" \n",
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].sort_index()\n",
" break\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"bhr_summary = pings.map(gecko_activity)\n",
"sample = bhr_summary.take(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's have a look at few individual submissions:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sample 0 activity:\n",
"0 0\n",
"1 825\n",
"3 2\n",
"7 1\n",
"15 3\n",
"31 13\n",
"63 58\n",
"127 10\n",
"255 2\n",
"511 3\n",
"1023 0\n",
"dtype: int64\n",
"Sample 0 hangs:\n",
"127 0\n",
"255 2\n",
"511 3\n",
"1023 0\n",
"dtype: float64\n",
"\n",
"Sample 1 activity:\n",
"0 0\n",
"1 8114\n",
"3 207\n",
"7 182\n",
"15 199\n",
"31 116\n",
"63 38\n",
"127 21\n",
"255 6\n",
"511 2\n",
"1023 4\n",
"2047 0\n",
"dtype: int64\n",
"Sample 1 hangs:\n",
"127 0\n",
"255 6\n",
"511 2\n",
"1023 4\n",
"2047 0\n",
"dtype: float64\n",
"\n",
"Sample 2 activity:\n",
"0 0\n",
"1 7390\n",
"3 207\n",
"7 181\n",
"15 199\n",
"31 114\n",
"63 35\n",
"127 21\n",
"255 6\n",
"511 2\n",
"1023 3\n",
"2047 0\n",
"dtype: int64\n",
"Sample 2 hangs:\n",
"127 0\n",
"255 6\n",
"511 2\n",
"1023 3\n",
"2047 0\n",
"dtype: float64\n",
"\n",
"Sample 3 activity:\n",
"0 0\n",
"1 422\n",
"3 5\n",
"7 3\n",
"15 5\n",
"31 11\n",
"63 9\n",
"127 6\n",
"511 1\n",
"1023 1\n",
"2047 0\n",
"dtype: int64\n",
"Sample 3 hangs:\n",
"255 0\n",
"511 1\n",
"1023 1\n",
"2047 0\n",
"dtype: float64\n",
"\n",
"Sample 4 activity:\n",
"0 0\n",
"1 5244\n",
"3 22\n",
"7 34\n",
"15 92\n",
"31 246\n",
"63 152\n",
"127 75\n",
"255 36\n",
"511 11\n",
"1023 10\n",
"2047 5\n",
"8191 1\n",
"16383 0\n",
"dtype: int64\n",
"Sample 4 hangs:\n",
"127 0\n",
"255 34\n",
"511 13\n",
"1023 10\n",
"2047 5\n",
"4095 0\n",
"8191 1\n",
"16383 1\n",
"32767 0\n",
"dtype: float64\n",
"\n"
]
}
],
"source": [
"for idx, x in enumerate(sample):\n",
" print \"Sample {} activity:\".format(idx)\n",
" print x[\"gecko_activity\"]\n",
" print \"Sample {} hangs:\".format(idx)\n",
" print x[\"gecko_hangs\"]\n",
" print"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many submissions have mismatching BHR reports?"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def compare(ping):\n",
" activity = ping[\"gecko_activity\"]\n",
" hangs = ping[\"gecko_hangs\"]\n",
" \n",
" index = sorted(set(hangs.index).union(set(activity.index))) \n",
" activity = pd.Series(activity[activity.index], index=index).fillna(0)\n",
" hangs = pd.Series(hangs[hangs.index], index=index)\n",
" \n",
" return np.all(activity[activity.index >= 255] == hangs[hangs.index >= 255])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.527938517179\n"
]
}
],
"source": [
"total = bhr_summary.count()\n",
"equal = bhr_summary.filter(compare).count()\n",
"print equal/total"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's have a look at few individual submissions with mismatching BHR reports:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sample = bhr_summary.filter(lambda x: not compare(x)).take(5)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sample 0 activity:\n",
"0 0\n",
"1 5244\n",
"3 22\n",
"7 34\n",
"15 92\n",
"31 246\n",
"63 152\n",
"127 75\n",
"255 36\n",
"511 11\n",
"1023 10\n",
"2047 5\n",
"8191 1\n",
"16383 0\n",
"dtype: int64\n",
"Sample 0 hangs:\n",
"127 0\n",
"255 34\n",
"511 13\n",
"1023 10\n",
"2047 5\n",
"4095 0\n",
"8191 1\n",
"16383 1\n",
"32767 0\n",
"dtype: float64\n",
"\n",
"Sample 1 activity:\n",
"0 0\n",
"1 68162\n",
"3 2223\n",
"7 609\n",
"15 667\n",
"31 1620\n",
"63 3040\n",
"127 364\n",
"255 232\n",
"511 85\n",
"1023 70\n",
"2047 5\n",
"4095 4\n",
"8191 2\n",
"16383 0\n",
"dtype: int64\n",
"Sample 1 hangs:\n",
"127 0\n",
"255 229\n",
"511 87\n",
"1023 71\n",
"2047 5\n",
"4095 4\n",
"8191 2\n",
"16383 0\n",
"dtype: float64\n",
"\n",
"Sample 2 activity:\n",
"0 0\n",
"1 66004\n",
"3 2201\n",
"7 585\n",
"15 643\n",
"31 1595\n",
"63 3009\n",
"127 358\n",
"255 227\n",
"511 85\n",
"1023 69\n",
"2047 5\n",
"4095 3\n",
"8191 2\n",
"16383 0\n",
"dtype: int64\n",
"Sample 2 hangs:\n",
"127 0\n",
"255 224\n",
"511 87\n",
"1023 70\n",
"2047 5\n",
"4095 3\n",
"8191 2\n",
"16383 0\n",
"dtype: float64\n",
"\n",
"Sample 3 activity:\n",
"0 0\n",
"1 33775\n",
"3 945\n",
"7 691\n",
"15 1026\n",
"31 879\n",
"63 398\n",
"127 106\n",
"255 46\n",
"511 24\n",
"1023 8\n",
"2047 0\n",
"dtype: int64\n",
"Sample 3 hangs:\n",
"127 0\n",
"255 46\n",
"511 21\n",
"1023 9\n",
"2047 1\n",
"4095 0\n",
"dtype: float64\n",
"\n",
"Sample 4 activity:\n",
"0 0\n",
"1 68201\n",
"3 1648\n",
"7 1456\n",
"15 1707\n",
"31 2454\n",
"63 1016\n",
"127 232\n",
"255 83\n",
"511 46\n",
"1023 15\n",
"2047 1\n",
"4095 0\n",
"dtype: int64\n",
"Sample 4 hangs:\n",
"127 0\n",
"255 79\n",
"511 46\n",
"1023 16\n",
"2047 2\n",
"4095 0\n",
"dtype: float64\n",
"\n"
]
}
],
"source": [
"for idx, x in enumerate(sample):\n",
" print \"Sample {} activity:\".format(idx)\n",
" print x[\"gecko_activity\"]\n",
" print \"Sample {} hangs:\".format(idx)\n",
" print x[\"gecko_hangs\"]\n",
" print"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment