-
-
Save chutten/ef607dc7cc6cd835eca7f7cdd8ec45e3 to your computer and use it in GitHub Desktop.
beta47_slow_script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ### Beta 47 Slow Script | |
# This is a very a brief introduction to Spark and Telemetry in Python. You should have a look at the [tutorial](https://gist.github.com/vitillo/25a20b7c8685c0c82422) in Scala and the associated [talk](http://www.slideshare.net/RobertoAgostinoVitil/spark-meets-telemetry) if you are interested to learn more about Spark. | |
# In[1]: | |
import numbers | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
from plotly.graph_objs import * | |
from montecarlino import grouped_permutation_test | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records | |
get_ipython().magic(u'pylab inline') | |
# In[2]: | |
sc.defaultParallelism | |
# In[15]: | |
pings = get_pings(sc, app="Firefox", channel="beta", version="47.0", fraction=0.25) | |
# In[38]: | |
subset = get_pings_properties(pings, ["clientId", | |
"environment/settings/userPrefs/dom.max_script_run_time", | |
"environment/settings/e10sCohort", | |
"payload/simpleMeasurements/uptime", | |
"payload/histograms/SLOW_SCRIPT_PAGE_COUNT"]) | |
# In[43]: | |
subset = subset.filter(lambda p: p["environment/settings/e10sCohort"] in ["test", "control"]) | |
# In[44]: | |
subset = get_one_ping_per_client(subset) | |
# In[45]: | |
cached = subset.cache() | |
# How many pings are we looking at? | |
# In[46]: | |
cached.count() | |
# In[47]: | |
cached.map(lambda p: (p["environment/settings/userPrefs/dom.max_script_run_time"], p)).countByKey() | |
# In[49]: | |
frame = pd.DataFrame(cached.collect()) | |
e10s = frame[frame["environment/settings/e10sCohort"] == "test"] | |
none10s = frame[frame["environment/settings/e10sCohort"] == "control"] | |
e10s.count(), none10s.count() | |
# In[50]: | |
def normalize_uptime_hour(frame, metric): | |
frame = frame[frame["payload/simpleMeasurements/uptime"] > 0] | |
frame = frame[frame[metric] >= 0] | |
frame[metric] = 60 * frame[metric] / frame["payload/simpleMeasurements/uptime"] | |
return frame | |
e10s_norm = normalize_uptime_hour(e10s, "payload/histograms/SLOW_SCRIPT_PAGE_COUNT") | |
none10s_norm = normalize_uptime_hour(none10s, "payload/histograms/SLOW_SCRIPT_PAGE_COUNT") | |
# In[51]: | |
def median_diff(xs, ys): | |
return np.median(xs) - np.median(ys) | |
def compare_scalars(metric, *groups): | |
print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric, | |
median_diff(*groups), | |
np.median(groups[0]), | |
np.median(groups[1])) | |
print "The probability of this effect being purely by chance is {:.2f}.". format(grouped_permutation_test(median_diff, groups, num_samples=10000)) | |
# In[52]: | |
metric = "payload/histograms/SLOW_SCRIPT_PAGE_COUNT" | |
compare_scalars(metric, e10s_norm[metric], none10s_norm[metric]) | |
# In[53]: | |
pref = "environment/settings/userPrefs/dom.max_script_run_time" | |
compare_scalars(metric, e10s_norm[e10s_norm[pref].isnull()][metric], none10s_norm[none10s_norm[pref].isnull()][metric]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment