Last active
January 28, 2016 11:56
-
-
Save vitillo/ae0464d7f1dd39752ab6 to your computer and use it in GitHub Desktop.
Activity Histogram Bucketing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n", | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"import IPython\n", | |
"import functools\n", | |
"\n", | |
"from __future__ import division\n", | |
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n", | |
"\n", | |
"%pylab inline\n", | |
"IPython.core.pylabtools.figsize(16, 7)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"pings = get_pings(sc, channel=\"nightly\", submission_date=\"20160127\", build_id=(\"20160127000000\", \"20160127999999\"))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def gecko_activity(ping):\n", | |
" threads = ping[\"payload\"].get(\"threadHangStats\", {})\n", | |
" result = {}\n", | |
" \n", | |
" for thread in threads:\n", | |
" if thread[\"name\"] == \"Gecko\":\n", | |
" activity = thread[\"activity\"][\"values\"]\n", | |
" result[\"gecko_activity\"] = pd.Series(activity.values(), index=map(int, activity.keys())).sort_index()\n", | |
" \n", | |
" result[\"gecko_hangs\"] = pd.Series()\n", | |
" for hang in thread[\"hangs\"]:\n", | |
" histogram = hang[\"histogram\"][\"values\"]\n", | |
" hang_histogram = pd.Series(histogram.values(), index=map(int, histogram.keys()))\n", | |
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].add(hang_histogram, fill_value=0)\n", | |
" \n", | |
" result[\"gecko_hangs\"] = result[\"gecko_hangs\"].sort_index()\n", | |
" break\n", | |
" \n", | |
" return result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"bhr_summary = pings.map(gecko_activity)\n", | |
"sample = bhr_summary.take(5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's have a look at few individual submissions:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Sample 0 activity:\n", | |
"0 0\n", | |
"1 825\n", | |
"3 2\n", | |
"7 1\n", | |
"15 3\n", | |
"31 13\n", | |
"63 58\n", | |
"127 10\n", | |
"255 2\n", | |
"511 3\n", | |
"1023 0\n", | |
"dtype: int64\n", | |
"Sample 0 hangs:\n", | |
"127 0\n", | |
"255 2\n", | |
"511 3\n", | |
"1023 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 1 activity:\n", | |
"0 0\n", | |
"1 8114\n", | |
"3 207\n", | |
"7 182\n", | |
"15 199\n", | |
"31 116\n", | |
"63 38\n", | |
"127 21\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 4\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 1 hangs:\n", | |
"127 0\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 4\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 2 activity:\n", | |
"0 0\n", | |
"1 7390\n", | |
"3 207\n", | |
"7 181\n", | |
"15 199\n", | |
"31 114\n", | |
"63 35\n", | |
"127 21\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 3\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 2 hangs:\n", | |
"127 0\n", | |
"255 6\n", | |
"511 2\n", | |
"1023 3\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 3 activity:\n", | |
"0 0\n", | |
"1 422\n", | |
"3 5\n", | |
"7 3\n", | |
"15 5\n", | |
"31 11\n", | |
"63 9\n", | |
"127 6\n", | |
"511 1\n", | |
"1023 1\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 3 hangs:\n", | |
"255 0\n", | |
"511 1\n", | |
"1023 1\n", | |
"2047 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 4 activity:\n", | |
"0 0\n", | |
"1 5244\n", | |
"3 22\n", | |
"7 34\n", | |
"15 92\n", | |
"31 246\n", | |
"63 152\n", | |
"127 75\n", | |
"255 36\n", | |
"511 11\n", | |
"1023 10\n", | |
"2047 5\n", | |
"8191 1\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 4 hangs:\n", | |
"127 0\n", | |
"255 34\n", | |
"511 13\n", | |
"1023 10\n", | |
"2047 5\n", | |
"4095 0\n", | |
"8191 1\n", | |
"16383 1\n", | |
"32767 0\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for idx, x in enumerate(sample):\n", | |
" print \"Sample {} activity:\".format(idx)\n", | |
" print x[\"gecko_activity\"]\n", | |
" print \"Sample {} hangs:\".format(idx)\n", | |
" print x[\"gecko_hangs\"]\n", | |
" print" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"How many submissions have mismatching BHR reports?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def compare(ping):\n", | |
" activity = ping[\"gecko_activity\"]\n", | |
" hangs = ping[\"gecko_hangs\"]\n", | |
" \n", | |
" index = sorted(set(hangs.index).union(set(activity.index))) \n", | |
" activity = pd.Series(activity[activity.index], index=index).fillna(0)\n", | |
" hangs = pd.Series(hangs[hangs.index], index=index)\n", | |
" \n", | |
" return np.all(activity[activity.index >= 255] == hangs[hangs.index >= 255])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.527938517179\n" | |
] | |
} | |
], | |
"source": [ | |
"total = bhr_summary.count()\n", | |
"equal = bhr_summary.filter(compare).count()\n", | |
"print equal/total" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's have a look at few individual submissions with mismatching BHR reports:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"sample = bhr_summary.filter(lambda x: not compare(x)).take(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Sample 0 activity:\n", | |
"0 0\n", | |
"1 5244\n", | |
"3 22\n", | |
"7 34\n", | |
"15 92\n", | |
"31 246\n", | |
"63 152\n", | |
"127 75\n", | |
"255 36\n", | |
"511 11\n", | |
"1023 10\n", | |
"2047 5\n", | |
"8191 1\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 0 hangs:\n", | |
"127 0\n", | |
"255 34\n", | |
"511 13\n", | |
"1023 10\n", | |
"2047 5\n", | |
"4095 0\n", | |
"8191 1\n", | |
"16383 1\n", | |
"32767 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 1 activity:\n", | |
"0 0\n", | |
"1 68162\n", | |
"3 2223\n", | |
"7 609\n", | |
"15 667\n", | |
"31 1620\n", | |
"63 3040\n", | |
"127 364\n", | |
"255 232\n", | |
"511 85\n", | |
"1023 70\n", | |
"2047 5\n", | |
"4095 4\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 1 hangs:\n", | |
"127 0\n", | |
"255 229\n", | |
"511 87\n", | |
"1023 71\n", | |
"2047 5\n", | |
"4095 4\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 2 activity:\n", | |
"0 0\n", | |
"1 66004\n", | |
"3 2201\n", | |
"7 585\n", | |
"15 643\n", | |
"31 1595\n", | |
"63 3009\n", | |
"127 358\n", | |
"255 227\n", | |
"511 85\n", | |
"1023 69\n", | |
"2047 5\n", | |
"4095 3\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: int64\n", | |
"Sample 2 hangs:\n", | |
"127 0\n", | |
"255 224\n", | |
"511 87\n", | |
"1023 70\n", | |
"2047 5\n", | |
"4095 3\n", | |
"8191 2\n", | |
"16383 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 3 activity:\n", | |
"0 0\n", | |
"1 33775\n", | |
"3 945\n", | |
"7 691\n", | |
"15 1026\n", | |
"31 879\n", | |
"63 398\n", | |
"127 106\n", | |
"255 46\n", | |
"511 24\n", | |
"1023 8\n", | |
"2047 0\n", | |
"dtype: int64\n", | |
"Sample 3 hangs:\n", | |
"127 0\n", | |
"255 46\n", | |
"511 21\n", | |
"1023 9\n", | |
"2047 1\n", | |
"4095 0\n", | |
"dtype: float64\n", | |
"\n", | |
"Sample 4 activity:\n", | |
"0 0\n", | |
"1 68201\n", | |
"3 1648\n", | |
"7 1456\n", | |
"15 1707\n", | |
"31 2454\n", | |
"63 1016\n", | |
"127 232\n", | |
"255 83\n", | |
"511 46\n", | |
"1023 15\n", | |
"2047 1\n", | |
"4095 0\n", | |
"dtype: int64\n", | |
"Sample 4 hangs:\n", | |
"127 0\n", | |
"255 79\n", | |
"511 46\n", | |
"1023 16\n", | |
"2047 2\n", | |
"4095 0\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for idx, x in enumerate(sample):\n", | |
" print \"Sample {} activity:\".format(idx)\n", | |
" print x[\"gecko_activity\"]\n", | |
" print \"Sample {} hangs:\".format(idx)\n", | |
" print x[\"gecko_hangs\"]\n", | |
" print" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment