Skip to content

Instantly share code, notes, and snippets.

@chutten
Created September 12, 2016 19:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chutten/feb43ea722a7a0dcacfe399ba03a1ffe to your computer and use it in GitHub Desktop.
Save chutten/feb43ea722a7a0dcacfe399ba03a1ffe to your computer and use it in GitHub Desktop.
slow_script_saturdays
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Slow Script Saturdays"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is a small analysis of the `SLOW_SCRIPT_NOTICE_COUNT` histogram on two nightly builds: 20160903 and 20160910 (both Saturdays)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING: pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"32"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 20160910"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"nightly\", build_id=(\"20160910000000\", \"20160910999999\"), fraction=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"... and extract only the attributes we need from the Telemetry submissions:"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"subset = get_pings_properties(pings, [\"clientId\",\n",
" \"environment/system/os/name\",\n",
" \"payload/info/subsessionLength\",\n",
" \"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"])"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int, {u'Darwin': 10, u'Linux': 26, u'Windows_NT': 610})"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None) \\\n",
" .map(lambda p: (p[\"environment/system/os/name\"], 1)) \\\n",
" .countByKey()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"44539"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subset.count()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"36746"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"usage = subset.map(lambda p: max(0, min(25, p[\"payload/info/subsessionLength\"] / 3600))).reduce(lambda a,b: a+b)\n",
"usage"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"16906L"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notices = subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).map(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"]).reduce(lambda a,b: a+b)\n",
"notices"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"646"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reporting_pings = subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).count()\n",
"reporting_pings"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"544"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reporting_clients = subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).map(lambda p: p[\"clientId\"]).distinct().count()\n",
"reporting_clients"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.3795774489773008"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notices_per_ping = 1.0 * notices / subset.count()\n",
"notices_per_ping"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"31.077205882352942"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notices_per_reporting_client = 1.0 * notices / reporting_clients\n",
"notices_per_reporting_client"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.46007728732379033"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notices_per_usage_hour = 1.0 * notices / usage\n",
"notices_per_usage_hour"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 20160903"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"last_weeks_pings = get_pings(sc, app=\"Firefox\", channel=\"nightly\", build_id=(\"20160903000000\", \"20160903999999\"), fraction=1)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"last_weeks_subset = get_pings_properties(last_weeks_pings, [\"clientId\",\n",
" \"environment/system/os/name\",\n",
" \"payload/info/subsessionLength\",\n",
" \"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int, {u'Darwin': 71, u'Linux': 132, u'Windows_NT': 2922})"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None) \\\n",
" .map(lambda p: (p[\"environment/system/os/name\"], 1)) \\\n",
" .countByKey()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"55080"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_subset.count()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"57854"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_usage = last_weeks_subset.map(lambda p: max(0, min(25, p[\"payload/info/subsessionLength\"] / 3600))).reduce(lambda a,b: a+b)\n",
"last_weeks_usage"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"27819L"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_notices = last_weeks_subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).map(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"]).reduce(lambda a,b: a+b)\n",
"last_weeks_notices"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3125"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_reporting_pings = last_weeks_subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).count()\n",
"last_weeks_reporting_pings"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2596"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_reporting_clients = last_weeks_subset.filter(lambda p: p[\"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\"] is not None).map(lambda p: p[\"clientId\"]).distinct().count()\n",
"last_weeks_reporting_clients"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.5050653594771242"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_notices_per_ping = 1.0 * last_weeks_notices / last_weeks_subset.count()\n",
"last_weeks_notices_per_ping"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"10.716101694915254"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_notices_per_reporting_client = 1.0 * last_weeks_notices / last_weeks_reporting_clients\n",
"last_weeks_notices_per_reporting_client"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.4808483423790922"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"last_weeks_notices_per_usage_hour = 1.0 * last_weeks_notices / last_weeks_usage\n",
"last_weeks_notices_per_usage_hour"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Observations\n",
"\n",
"0903 and 0910's notices per usage hour metrics remained relatively stable (0.46 to 0.48), but everything else changed. \n",
"\n",
"Most alarming is the notices per reporting client measure which jumped from 10.7 to 31.1. This is consistent with going from a population where \"many are having a poor time\" to a population where \"few are having a terrible time\"."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# ### Slow Script Saturdays
# This is a small analysis of the `SLOW_SCRIPT_NOTICE_COUNT` histogram on two nightly builds: 20160903 and 20160910 (both Saturdays).
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
get_ipython().magic(u'pylab inline')
# In[2]:
sc.defaultParallelism
# ### 20160910
# In[11]:
pings = get_pings(sc, app="Firefox", channel="nightly", build_id=("20160910000000", "20160910999999"), fraction=1)
# ... and extract only the attributes we need from the Telemetry submissions:
# In[35]:
subset = get_pings_properties(pings, ["clientId",
"environment/system/os/name",
"payload/info/subsessionLength",
"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"])
# In[36]:
subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None) .map(lambda p: (p["environment/system/os/name"], 1)) .countByKey()
# In[17]:
subset.count()
# In[19]:
usage = subset.map(lambda p: max(0, min(25, p["payload/info/subsessionLength"] / 3600))).reduce(lambda a,b: a+b)
usage
# In[21]:
notices = subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).map(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"]).reduce(lambda a,b: a+b)
notices
# In[38]:
reporting_pings = subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).count()
reporting_pings
# In[39]:
reporting_clients = subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).map(lambda p: p["clientId"]).distinct().count()
reporting_clients
# In[41]:
notices_per_ping = 1.0 * notices / subset.count()
notices_per_ping
# In[52]:
notices_per_reporting_client = 1.0 * notices / reporting_clients
notices_per_reporting_client
# In[53]:
notices_per_usage_hour = 1.0 * notices / usage
notices_per_usage_hour
# ### 20160903
# In[25]:
last_weeks_pings = get_pings(sc, app="Firefox", channel="nightly", build_id=("20160903000000", "20160903999999"), fraction=1)
# In[33]:
last_weeks_subset = get_pings_properties(last_weeks_pings, ["clientId",
"environment/system/os/name",
"payload/info/subsessionLength",
"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"])
# In[34]:
last_weeks_subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None) .map(lambda p: (p["environment/system/os/name"], 1)) .countByKey()
# In[27]:
last_weeks_subset.count()
# In[28]:
last_weeks_usage = last_weeks_subset.map(lambda p: max(0, min(25, p["payload/info/subsessionLength"] / 3600))).reduce(lambda a,b: a+b)
last_weeks_usage
# In[29]:
last_weeks_notices = last_weeks_subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).map(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"]).reduce(lambda a,b: a+b)
last_weeks_notices
# In[42]:
last_weeks_reporting_pings = last_weeks_subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).count()
last_weeks_reporting_pings
# In[43]:
last_weeks_reporting_clients = last_weeks_subset.filter(lambda p: p["payload/histograms/SLOW_SCRIPT_NOTICE_COUNT"] is not None).map(lambda p: p["clientId"]).distinct().count()
last_weeks_reporting_clients
# In[46]:
last_weeks_notices_per_ping = 1.0 * last_weeks_notices / last_weeks_subset.count()
last_weeks_notices_per_ping
# In[51]:
last_weeks_notices_per_reporting_client = 1.0 * last_weeks_notices / last_weeks_reporting_clients
last_weeks_notices_per_reporting_client
# In[50]:
last_weeks_notices_per_usage_hour = 1.0 * last_weeks_notices / last_weeks_usage
last_weeks_notices_per_usage_hour
# ### Observations
#
# 0903 and 0910's notices per usage hour metrics remained relatively stable (0.46 to 0.48), but everything else changed.
#
# Most alarming is the notices per reporting client measure which jumped from 10.7 to 31.1. This is consistent with going from a population where "many are having a poor time" to a population where "few are having a terrible time".
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment