Skip to content

Instantly share code, notes, and snippets.

@Uberi
Last active March 14, 2016 15:23
Show Gist options
  • Save Uberi/2e97bd029eb5783967b7 to your computer and use it in GitHub Desktop.
Save Uberi/2e97bd029eb5783967b7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n"
]
}
],
"source": [
"from datetime import datetime\n",
"\n",
"import psycopg2\n",
"import numpy as np\n",
"\n",
"from moztelemetry.spark import get_pings, get_pings_properties"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"SUBMISSION_DATE_RANGE = (datetime.utcnow().strftime(\"%Y%m%d\"),) * 2\n",
"FRACTION = 0.1\n",
"\n",
"COMPARABLE_DIMENSIONS = [\n",
" \"environment/build/version\",\n",
" \"environment/build/buildId\",\n",
" \"application/channel\",\n",
" \"application/name\",\n",
" \"environment/system/os/name\",\n",
" \"environment/system/os/version\",\n",
" \"environment/build/architecture\",\n",
" \"meta/geoCountry\",\n",
" \"environment/addons/activeExperiment/id\",\n",
" \"environment/addons/activeExperiment/branch\",\n",
" \"environment/settings/e10sEnabled\",\n",
"]\n",
"DIMENSION_NAMES = [\n",
" \"build_version\",\n",
" \"build_id\",\n",
" \"channel\",\n",
" \"application\",\n",
" \"os_name\",\n",
" \"os_version\",\n",
" \"architecture\",\n",
" \"country\",\n",
" \"experiment_id\",\n",
" \"experiment_branch\",\n",
" \"e10s_enabled\",\n",
"]\n",
"assert len(COMPARABLE_DIMENSIONS) == len(DIMENSION_NAMES)\n",
"\n",
"def compare_crashes(pings, comparable_dimensions):\n",
" \"\"\"Returns a PairRDD where keys are user configurations and values are Numpy arrays of the form [usage hours, main process crashes, content process crashes, plugin crashes]\"\"\"\n",
" ping_properties = get_pings_properties(pings, comparable_dimensions + [\n",
" \"payload/info/subsessionLength\",\n",
" \"meta/submissionDate\",\n",
" \"meta/reason\",\n",
" \"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/content\",\n",
" \"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/plugin\",\n",
" \"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/gmplugin\",\n",
" ])\n",
" return ping_properties.map(lambda p: (\n",
" # the keys we want to filter based on\n",
" (p[\"meta/submissionDate\"],) + tuple(p[key] for key in comparable_dimensions),\n",
" np.array([\n",
" (p[\"payload/info/subsessionLength\"] or 0) / 3600.0,\n",
" int(p[\"meta/reason\"] == \"aborted-session\"), # main process crashes\n",
" p[\"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/content\"] or 0, # content process crashes\n",
" (p[\"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/plugin\"] or 0) +\n",
" (p[\"payload/keyedHistograms/SUBPROCESS_ABNORMAL_ABORT/gmplugin\"] or 0) # plugin crashes\n",
" ])\n",
" )).reduceByKey(lambda a, b: a + b)\n",
"\n",
"def retrieve_crash_data(sc, submission_date_range, comparable_dimensions, fraction = 0.1):\n",
" # get the raw data\n",
" normal_pings = get_pings(\n",
" sc,\n",
" submission_date=submission_date_range,\n",
" fraction=fraction\n",
" )\n",
" crash_pings = get_pings(\n",
" sc, doc_type=\"main\",\n",
" submission_date=submission_date_range,\n",
" fraction=fraction\n",
" ).filter(lambda p: p.get(\"meta\", {}).get(\"reason\") == \"aborted-session\")\n",
"\n",
" return normal_pings.union(crash_pings)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"conn = psycopg2.connect(database=\"aggregates\", user=\"postgres\")\n",
"cur = conn.cursor()\n",
"\n",
"cur.execute(\"\"\"\n",
"CREATE TABLE IF NOT EXISTS aggregates (\n",
" id serial PRIMARY KEY,\n",
" submission_date date,\n",
" build_version varchar,\n",
" build_id varchar,\n",
" channel varchar,\n",
" application varchar,\n",
" os_name varchar,\n",
" os_version varchar,\n",
" architecture varchar,\n",
" country varchar,\n",
" experiment_id varchar,\n",
" experiment_branch varchar,\n",
" e10s_enabled varchar,\n",
" usage_hours real,\n",
" main_crashes real,\n",
" content_crashes real,\n",
" plugin_crashes real\n",
");\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# remove previous data for the selected days, if available\n",
"cur.execute(\n",
" \"\"\"DELETE FROM aggregates WHERE submission_date >= %s and submission_date <= %s\"\"\".format(\", \".join(DIMENSION_NAMES)),\n",
" (datetime.strptime(SUBMISSION_DATE_RANGE[0], \"%Y%m%d\").date(), datetime.strptime(SUBMISSION_DATE_RANGE[1], \"%Y%m%d\").date())\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = retrieve_crash_data(sc, SUBMISSION_DATE_RANGE, COMPARABLE_DIMENSIONS, FRACTION)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"result = compare_crashes(pings, COMPARABLE_DIMENSIONS)\n",
"for dimension_values, crash_data in result.toLocalIterator():\n",
" submission_date, dimension_values = dimension_values[0], dimension_values[1:]\n",
" submission_date = datetime.strptime(submission_date, \"%Y%m%d\")\n",
" usage_hours, main_crashes, content_crashes, plugin_crashes = crash_data\n",
" cur.execute(\n",
" \"\"\"INSERT INTO aggregates(submission_date, {}, usage_hours, main_crashes, content_crashes, plugin_crashes) VALUES (%s, {}%s, %s, %s, %s)\"\"\".format(\n",
" \", \".join(DIMENSION_NAMES), \"%s, \" * len(DIMENSION_NAMES)\n",
" ),\n",
" (submission_date,) + dimension_values + (usage_hours, main_crashes, content_crashes, plugin_crashes)\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"conn.commit()\n",
"cur.close()\n",
"conn.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment