Skip to content

Instantly share code, notes, and snippets.

@Uberi
Last active April 8, 2016 15:10
Show Gist options
  • Save Uberi/8e88b1d8439f3e90068d46709ad37897 to your computer and use it in GitHub Desktop.
Save Uberi/8e88b1d8439f3e90068d46709ad37897 to your computer and use it in GitHub Desktop.
Untitled
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import IPython\n",
"\n",
"from __future__ import division\n",
"from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties\n",
"from montecarlino import grouped_permutation_test\n",
"\n",
"%pylab inline\n",
"IPython.core.pylabtools.figsize(16, 7)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"320"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def chi2_distance(xs, ys, eps = 1e-10, normalize = True):\n",
" histA = xs.sum(axis=0)\n",
" histB = ys.sum(axis=0)\n",
" \n",
" if normalize:\n",
" histA = histA/histA.sum()\n",
" histB = histB/histB.sum()\n",
" \n",
" d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)\n",
" for (a, b) in zip(histA, histB)])\n",
"\n",
" return d\n",
"\n",
"def median_diff(xs, ys):\n",
" return np.median(xs) - np.median(ys)\n",
"\n",
"def normalize_uptime_hour(frame):\n",
" frame = frame[frame[\"payload/simpleMeasurements/uptime\"] > 0]\n",
" frame = 60 * frame.apply(lambda x: x/frame[\"payload/simpleMeasurements/uptime\"]) # Metric per hour\n",
" frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)\n",
" return frame\n",
" \n",
"def compare_count_histograms(pings, *histograms_names):\n",
" values = get_pings_properties(pings, [\n",
" \"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT\",\n",
" \"payload/histograms/SLOW_SCRIPT_PAGE_COUNT\",\n",
" \"payload/simpleMeasurements/uptime\",\n",
" \"environment/settings/e10sEnabled\",\n",
" ])\n",
" frame = pd.DataFrame(values.collect())\n",
"\n",
" e10s = frame[frame[\"environment/settings/e10sEnabled\"] == True]\n",
" e10s = normalize_uptime_hour(e10s)\n",
" \n",
" none10s = frame[frame[\"environment/settings/e10sEnabled\"] == False]\n",
" none10s = normalize_uptime_hour(none10s)\n",
" \n",
" for histogram in e10s.columns:\n",
" if histogram == \"environment/settings/e10sEnabled\" or histogram.endswith(\"_parent\") or histogram.endswith(\"_children\"):\n",
" continue\n",
" \n",
" compare_scalars(histogram + \" per hour\", e10s[histogram].dropna(), none10s[histogram].dropna())\n",
"\n",
"def compare_scalars(metric, *groups):\n",
" print \"Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).\".format(metric,\n",
" median_diff(*groups), \n",
" np.median(groups[0]),\n",
" np.median(groups[1]))\n",
" print \"The probability of this effect being purely by chance is {:.2f}.\". \\\n",
" format(grouped_permutation_test(median_diff, groups, num_samples=10000))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"nightly\", submission_date=(\"20160405\", \"20160405\"), fraction=1)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"95899"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings.count()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median difference in payload/histograms/SLOW_SCRIPT_NOTICE_COUNT per hour is 0.12, (0.61, 0.50).\n",
"The probability of this effect being purely by chance is 0.33.\n",
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.12, (0.45, 0.34).\n",
"The probability of this effect being purely by chance is 0.22.\n"
]
}
],
"source": [
"compare_count_histograms(pings, \"\")"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"aurora\", submission_date=(\"20160405\", \"20160405\"), fraction=1)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"245350"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings.count()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median difference in payload/histograms/SLOW_SCRIPT_NOTICE_COUNT per hour is 0.12, (0.60, 0.49).\n",
"The probability of this effect being purely by chance is 0.12.\n",
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.11, (0.43, 0.32).\n",
"The probability of this effect being purely by chance is 0.05.\n"
]
}
],
"source": [
"compare_count_histograms(pings, \"\")"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"nightly\", build_id=(\"20160402000000\", \"20160405999999\"), fraction=1)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"257558"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings.count()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median difference in payload/histograms/SLOW_SCRIPT_NOTICE_COUNT per hour is 0.17, (0.67, 0.50).\n",
"The probability of this effect being purely by chance is 0.05.\n",
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.14, (0.52, 0.38).\n",
"The probability of this effect being purely by chance is 0.04.\n"
]
}
],
"source": [
"compare_count_histograms(pings, \"\")"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"aurora\", build_id=(\"20160402000000\", \"20160405999999\"), fraction=1)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"625624"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings.count()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median difference in payload/histograms/SLOW_SCRIPT_NOTICE_COUNT per hour is 0.12, (0.68, 0.56).\n",
"The probability of this effect being purely by chance is 0.04.\n",
"Median difference in payload/histograms/SLOW_SCRIPT_PAGE_COUNT per hour is 0.11, (0.52, 0.41).\n",
"The probability of this effect being purely by chance is 0.01.\n"
]
}
],
"source": [
"compare_count_histograms(pings, \"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# In[2]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import IPython
from __future__ import division
from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties
from montecarlino import grouped_permutation_test
get_ipython().magic(u'pylab inline')
IPython.core.pylabtools.figsize(16, 7)
# In[3]:
sc.defaultParallelism
# In[46]:
def chi2_distance(xs, ys, eps = 1e-10, normalize = True):
histA = xs.sum(axis=0)
histB = ys.sum(axis=0)
if normalize:
histA = histA/histA.sum()
histB = histB/histB.sum()
d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
for (a, b) in zip(histA, histB)])
return d
def median_diff(xs, ys):
return np.median(xs) - np.median(ys)
def normalize_uptime_hour(frame):
frame = frame[frame["payload/simpleMeasurements/uptime"] > 0]
frame = 60 * frame.apply(lambda x: x/frame["payload/simpleMeasurements/uptime"]) # Metric per hour
frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)
return frame
def compare_count_histograms(pings, *histograms_names):
values = get_pings_properties(pings, [
"payload/histograms/SLOW_SCRIPT_NOTICE_COUNT",
"payload/histograms/SLOW_SCRIPT_PAGE_COUNT",
"payload/simpleMeasurements/uptime",
"environment/settings/e10sEnabled",
])
frame = pd.DataFrame(values.collect())
e10s = frame[frame["environment/settings/e10sEnabled"] == True]
e10s = normalize_uptime_hour(e10s)
none10s = frame[frame["environment/settings/e10sEnabled"] == False]
none10s = normalize_uptime_hour(none10s)
for histogram in e10s.columns:
if histogram == "environment/settings/e10sEnabled" or histogram.endswith("_parent") or histogram.endswith("_children"):
continue
compare_scalars(histogram + " per hour", e10s[histogram].dropna(), none10s[histogram].dropna())
def compare_scalars(metric, *groups):
print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric,
median_diff(*groups),
np.median(groups[0]),
np.median(groups[1]))
print "The probability of this effect being purely by chance is {:.2f}.". format(grouped_permutation_test(median_diff, groups, num_samples=10000))
# In[20]:
pings = get_pings(sc, app="Firefox", channel="nightly", submission_date=("20160405", "20160405"), fraction=1)
# In[43]:
pings.count()
# In[47]:
compare_count_histograms(pings, "")
# In[49]:
pings = get_pings(sc, app="Firefox", channel="aurora", submission_date=("20160405", "20160405"), fraction=1)
# In[50]:
pings.count()
# In[51]:
compare_count_histograms(pings, "")
# In[61]:
pings = get_pings(sc, app="Firefox", channel="nightly", build_id=("20160402000000", "20160405999999"), fraction=1)
# In[62]:
pings.count()
# In[63]:
compare_count_histograms(pings, "")
# In[58]:
pings = get_pings(sc, app="Firefox", channel="aurora", build_id=("20160402000000", "20160405999999"), fraction=1)
# In[59]:
pings.count()
# In[60]:
compare_count_histograms(pings, "")
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment