chutten/e10s_experiment.ipynb Secret

## e10s_experiment.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              e10s_experiment.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## e10s_experiment.py

# coding: utf-8

# ### e10s-beta46-noapz: MEMORY_TOTAL analysis

# In[1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import IPython

from __future__ import division
from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties
from montecarlino import grouped_permutation_test

get_ipython().magic(u'pylab inline')
IPython.core.pylabtools.figsize(16, 7)


# In[2]:

sc.defaultParallelism


# In[19]:

def chi2_distance(xs, ys, eps = 1e-10, normalize = True):
    histA = xs.sum(axis=0)
    histB = ys.sum(axis=0)

    if normalize:
        histA = histA/histA.sum()
        histB = histB/histB.sum()

    d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
        for (a, b) in zip(histA, histB)])

    return d

def median_diff(xs, ys):
    return np.median(xs) - np.median(ys)

def compare_histogram(histogram, e10s, none10s):
    # Normalize individual histograms
    e10s = e10s.map(lambda x: x/x.sum())
    none10s = none10s.map(lambda x: x/x.sum())

    e10s = e10s.map(lambda x: x[x.index > 75 * 1024]).map(lambda x: x[x.index < 2048 * 1024])
    none10s = none10s.map(lambda x: x[x.index > 75 * 1024]).map(lambda x: x[x.index < 2048 * 1024])

    pvalue = grouped_permutation_test(chi2_distance, [e10s, none10s], num_samples=100)

    eTotal = e10s.sum()
    nTotal = none10s.sum()

    eTotal = 100*eTotal/eTotal.sum()
    nTotal = 100*nTotal/nTotal.sum()

    fig = plt.figure()
    fig.subplots_adjust(hspace=0.3)

    ax = fig.add_subplot(1, 1, 1)
    ax2 = ax.twinx()
    width = 0.4
    ylim = max(eTotal.max(), nTotal.max())

    eTotal.plot(kind="bar", alpha=0.5, color="green", label="e10s", ax=ax, width=width, position=0, ylim=(0, ylim + 1))
    nTotal.plot(kind="bar", alpha=0.5, color="blue", label="non e10s", ax=ax2, width=width, position=1, grid=False, ylim=ax.get_ylim())

    ax.legend(ax.get_legend_handles_labels()[0] + ax2.get_legend_handles_labels()[0],
              ["e10s ({} samples".format(len(e10s)), "non e10s ({} samples)".format(len(none10s))])

    # If there are more than 100 labels, hide every other one so we can still read them
    if len(ax.get_xticklabels()) > 100:
        for label in ax.get_xticklabels()[::2]:
            label.set_visible(False)

    plt.title(histogram)
    plt.xlabel(histogram)
    plt.ylabel("Frequency %")
    plt.show()

    print "The probability that the distributions for {} are differing by chance is {:.2f}.".format(histogram, pvalue)

def normalize_uptime_hour(frame):
    frame = frame[frame["payload/simpleMeasurements/uptime"] > 0]
    frame = 60 * frame.apply(lambda x: x/frame["payload/simpleMeasurements/uptime"]) # Metric per hour
    frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)
    return frame

def compare_count_histograms(pings, *histograms_names):
    properties = histograms_names + ("payload/simpleMeasurements/uptime", "e10s")

    frame = pd.DataFrame(get_pings_properties(pings, properties).collect())

    e10s = frame[frame["e10s"] == True]
    e10s = normalize_uptime_hour(e10s)

    none10s = frame[frame["e10s"] == False]
    none10s = normalize_uptime_hour(none10s)

    for histogram in e10s.columns:
        if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
            continue

        compare_scalars(histogram + " per hour", e10s[histogram].dropna(), none10s[histogram].dropna())


def compare_histograms(pings, *histogram_names):
    frame = pd.DataFrame(get_pings_properties(pings, histogram_names + ("e10s",) , with_processes=True).collect())
    e10s = frame[frame["e10s"] == True]
    none10s = frame[frame["e10s"] == False]

    for histogram in none10s.columns:
        if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
            continue

        has_children = np.sum(e10s[histogram + "_children"].notnull()) > 0
        has_parent = np.sum(e10s[histogram + "_parent"].notnull()) > 0

        if has_children and has_parent:
            compare_histogram(histogram + " (parent + children)", e10s[histogram].dropna(), none10s[histogram].dropna())

        if has_parent:
            compare_histogram(histogram + " (parent)", e10s[histogram + "_parent"].dropna(), none10s[histogram].dropna())

        if has_children:
            compare_histogram(histogram + " (children)", e10s[histogram + "_children"].dropna(), none10s[histogram].dropna())

def compare_scalars(metric, *groups):
    print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric,
                                                                        median_diff(*groups),
                                                                        np.median(groups[0]),
                                                                        np.median(groups[1]))
    print "The probability of this effect being purely by chance is {:.2f}.".         format(grouped_permutation_test(median_diff, groups, num_samples=10000))


# #### Get e10s and non-e10s partitions

# In[4]:

dataset = sqlContext.read.load("s3://telemetry-parquet/e10s_experiment/e10s_beta46_cohorts/v20160405", "parquet")


# What are the branches, and how many clients do we have in each branch?

# In[5]:

dataset.select("e10sCohort").distinct().take(50)


# In[6]:

dataset.filter(dataset["e10sCohort"] == "test").count()


# In[7]:

dataset.filter(dataset["e10sCohort"] == "control").count()


# Sample by clientId; `sampled` is a small sample suitable for most measures, while `big_sampled` is a bigger sample used for when the small sample isn't statistically significant enough (such as for the slow script measures):

# In[8]:

sampled = dataset.filter(dataset.sampleId <= 6).filter((dataset.e10sCohort == "test") | (dataset.e10sCohort == "control"))
big_sampled = dataset.filter(dataset.sampleId <= 50).filter((dataset.e10sCohort == "test") | (dataset.e10sCohort == "control"))


# In[9]:

sampled.count(), big_sampled.count()


# How many clients have a mismatching e10s cohort?

# In[10]:

def e10s_status_mismatch(row):
    branch_status = True if row.e10sCohort == "test" else False
    e10sEnabled = json.loads(row.settings)["e10sEnabled"]
    return (row.e10sCohort, branch_status != e10sEnabled)


# In[11]:

sampled.rdd.map(e10s_status_mismatch).reduceByKey(lambda x, y: x + y).collect()


# Transform Dataframe to RDD of pings

# In[29]:

def row_2_ping(row):
    ping = {"payload": {"simpleMeasurements": json.loads(row.simpleMeasurements) if row.simpleMeasurements else {},
                        "histograms": json.loads(row.histograms) if row.histograms else {},
                        "keyedHistograms": json.loads(row.keyedHistograms) if row.keyedHistograms else {},
                        "childPayloads": json.loads(row.childPayloads) if row.childPayloads else {},
                        "threadHangStats": json.loads(row.threadHangStats)} if row.threadHangStats else {},
           "e10s": True if row.e10sCohort == "test" else False,
           "os": json.loads(row.system).get("os", {}).get("name", None)}
    return ping


# In[35]:

subset = sampled.rdd.map(row_2_ping)
big_subset = big_sampled.rdd.map(row_2_ping)


# ## Memory

# In[26]:

IPython.core.pylabtools.figsize(20, 18)


# In[22]:

compare_histograms(subset, "payload/histograms/MEMORY_TOTAL")


# ### Windows-only

# In[31]:

compare_histograms(subset.filter(lambda p: p["os"] == "Windows_NT"), "payload/histograms/MEMORY_TOTAL")


# ### Mac-only

# In[36]:

compare_histograms(big_subset.filter(lambda p: p["os"] == "Darwin"), "payload/histograms/MEMORY_TOTAL")


# ### Linux-only

# In[37]:

compare_histograms(big_subset.filter(lambda p: p["os"] == "Linux"), "payload/histograms/MEMORY_TOTAL")

	# coding: utf-8

	# ### e10s-beta46-noapz: MEMORY_TOTAL analysis

	# In[1]:

	import ujson as json
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np
	import plotly.plotly as py
	import IPython

	from __future__ import division
	from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties
	from montecarlino import grouped_permutation_test

	get_ipython().magic(u'pylab inline')
	IPython.core.pylabtools.figsize(16, 7)


	# In[2]:

	sc.defaultParallelism


	# In[19]:

	def chi2_distance(xs, ys, eps = 1e-10, normalize = True):
	histA = xs.sum(axis=0)
	histB = ys.sum(axis=0)

	if normalize:
	histA = histA/histA.sum()
	histB = histB/histB.sum()

	d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
	for (a, b) in zip(histA, histB)])

	return d

	def median_diff(xs, ys):
	return np.median(xs) - np.median(ys)

	def compare_histogram(histogram, e10s, none10s):
	# Normalize individual histograms
	e10s = e10s.map(lambda x: x/x.sum())
	none10s = none10s.map(lambda x: x/x.sum())

	e10s = e10s.map(lambda x: x[x.index > 75 * 1024]).map(lambda x: x[x.index < 2048 * 1024])
	none10s = none10s.map(lambda x: x[x.index > 75 * 1024]).map(lambda x: x[x.index < 2048 * 1024])

	pvalue = grouped_permutation_test(chi2_distance, [e10s, none10s], num_samples=100)

	eTotal = e10s.sum()
	nTotal = none10s.sum()

	eTotal = 100*eTotal/eTotal.sum()
	nTotal = 100*nTotal/nTotal.sum()

	fig = plt.figure()
	fig.subplots_adjust(hspace=0.3)

	ax = fig.add_subplot(1, 1, 1)
	ax2 = ax.twinx()
	width = 0.4
	ylim = max(eTotal.max(), nTotal.max())

	eTotal.plot(kind="bar", alpha=0.5, color="green", label="e10s", ax=ax, width=width, position=0, ylim=(0, ylim + 1))
	nTotal.plot(kind="bar", alpha=0.5, color="blue", label="non e10s", ax=ax2, width=width, position=1, grid=False, ylim=ax.get_ylim())

	ax.legend(ax.get_legend_handles_labels()[0] + ax2.get_legend_handles_labels()[0],
	["e10s ({} samples".format(len(e10s)), "non e10s ({} samples)".format(len(none10s))])

	# If there are more than 100 labels, hide every other one so we can still read them
	if len(ax.get_xticklabels()) > 100:
	for label in ax.get_xticklabels()[::2]:
	label.set_visible(False)

	plt.title(histogram)
	plt.xlabel(histogram)
	plt.ylabel("Frequency %")
	plt.show()

	print "The probability that the distributions for {} are differing by chance is {:.2f}.".format(histogram, pvalue)

	def normalize_uptime_hour(frame):
	frame = frame[frame["payload/simpleMeasurements/uptime"] > 0]
	frame = 60 * frame.apply(lambda x: x/frame["payload/simpleMeasurements/uptime"]) # Metric per hour
	frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)
	return frame

	def compare_count_histograms(pings, *histograms_names):
	properties = histograms_names + ("payload/simpleMeasurements/uptime", "e10s")

	frame = pd.DataFrame(get_pings_properties(pings, properties).collect())

	e10s = frame[frame["e10s"] == True]
	e10s = normalize_uptime_hour(e10s)

	none10s = frame[frame["e10s"] == False]
	none10s = normalize_uptime_hour(none10s)

	for histogram in e10s.columns:
	if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
	continue

	compare_scalars(histogram + " per hour", e10s[histogram].dropna(), none10s[histogram].dropna())


	def compare_histograms(pings, *histogram_names):
	frame = pd.DataFrame(get_pings_properties(pings, histogram_names + ("e10s",) , with_processes=True).collect())
	e10s = frame[frame["e10s"] == True]
	none10s = frame[frame["e10s"] == False]

	for histogram in none10s.columns:
	if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
	continue

	has_children = np.sum(e10s[histogram + "_children"].notnull()) > 0
	has_parent = np.sum(e10s[histogram + "_parent"].notnull()) > 0

	if has_children and has_parent:
	compare_histogram(histogram + " (parent + children)", e10s[histogram].dropna(), none10s[histogram].dropna())

	if has_parent:
	compare_histogram(histogram + " (parent)", e10s[histogram + "_parent"].dropna(), none10s[histogram].dropna())

	if has_children:
	compare_histogram(histogram + " (children)", e10s[histogram + "_children"].dropna(), none10s[histogram].dropna())

	def compare_scalars(metric, *groups):
	print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric,
	median_diff(*groups),
	np.median(groups[0]),
	np.median(groups[1]))
	print "The probability of this effect being purely by chance is {:.2f}.". format(grouped_permutation_test(median_diff, groups, num_samples=10000))


	# #### Get e10s and non-e10s partitions

	# In[4]:

	dataset = sqlContext.read.load("s3://telemetry-parquet/e10s_experiment/e10s_beta46_cohorts/v20160405", "parquet")


	# What are the branches, and how many clients do we have in each branch?

	# In[5]:

	dataset.select("e10sCohort").distinct().take(50)


	# In[6]:

	dataset.filter(dataset["e10sCohort"] == "test").count()


	# In[7]:

	dataset.filter(dataset["e10sCohort"] == "control").count()


	# Sample by clientId; `sampled` is a small sample suitable for most measures, while `big_sampled` is a bigger sample used for when the small sample isn't statistically significant enough (such as for the slow script measures):

	# In[8]:

	sampled = dataset.filter(dataset.sampleId <= 6).filter((dataset.e10sCohort == "test") \| (dataset.e10sCohort == "control"))
	big_sampled = dataset.filter(dataset.sampleId <= 50).filter((dataset.e10sCohort == "test") \| (dataset.e10sCohort == "control"))


	# In[9]:

	sampled.count(), big_sampled.count()


	# How many clients have a mismatching e10s cohort?

	# In[10]:

	def e10s_status_mismatch(row):
	branch_status = True if row.e10sCohort == "test" else False
	e10sEnabled = json.loads(row.settings)["e10sEnabled"]
	return (row.e10sCohort, branch_status != e10sEnabled)


	# In[11]:

	sampled.rdd.map(e10s_status_mismatch).reduceByKey(lambda x, y: x + y).collect()


	# Transform Dataframe to RDD of pings

	# In[29]:

	def row_2_ping(row):
	ping = {"payload": {"simpleMeasurements": json.loads(row.simpleMeasurements) if row.simpleMeasurements else {},
	"histograms": json.loads(row.histograms) if row.histograms else {},
	"keyedHistograms": json.loads(row.keyedHistograms) if row.keyedHistograms else {},
	"childPayloads": json.loads(row.childPayloads) if row.childPayloads else {},
	"threadHangStats": json.loads(row.threadHangStats)} if row.threadHangStats else {},
	"e10s": True if row.e10sCohort == "test" else False,
	"os": json.loads(row.system).get("os", {}).get("name", None)}
	return ping


	# In[35]:

	subset = sampled.rdd.map(row_2_ping)
	big_subset = big_sampled.rdd.map(row_2_ping)


	# ## Memory

	# In[26]:

	IPython.core.pylabtools.figsize(20, 18)


	# In[22]:

	compare_histograms(subset, "payload/histograms/MEMORY_TOTAL")


	# ### Windows-only

	# In[31]:

	compare_histograms(subset.filter(lambda p: p["os"] == "Windows_NT"), "payload/histograms/MEMORY_TOTAL")


	# ### Mac-only

	# In[36]:

	compare_histograms(big_subset.filter(lambda p: p["os"] == "Darwin"), "payload/histograms/MEMORY_TOTAL")


	# ### Linux-only

	# In[37]:

	compare_histograms(big_subset.filter(lambda p: p["os"] == "Linux"), "payload/histograms/MEMORY_TOTAL")