Skip to content

Instantly share code, notes, and snippets.

@chutten
Last active April 1, 2016 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chutten/cd0d60e1419ff20e3cefe2d12a9c2c93 to your computer and use it in GitHub Desktop.
Save chutten/cd0d60e1419ff20e3cefe2d12a9c2c93 to your computer and use it in GitHub Desktop.
beta46_e10s_cohort
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Distribution of e10sCohort in Beta46"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n",
"WARNING: "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sc.defaultParallelism"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pings = get_pings(sc, app=\"Firefox\", channel=\"beta\", version=\"46.0\", fraction=0.01)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"subset = get_pings_properties(pings, [\"clientId\",\n",
" \"environment/settings/e10sEnabled\",\n",
" \"environment/settings/e10sCohort\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"subset = get_one_ping_per_client(subset)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cached = subset.cache()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many clients are we looking at?"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"365528"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cached.count()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int,\n",
" {(u'control', False): 33665,\n",
" (u'disqualified', False): 88126,\n",
" (u'disqualified', True): 1,\n",
" (u'optedIn', False): 46,\n",
" (u'optedIn', True): 13,\n",
" (u'optedOut', False): 1403,\n",
" (u'optedOut', True): 1,\n",
" (u'test', False): 190,\n",
" (u'test', True): 33993,\n",
" (u'unknown', False): 152,\n",
" (u'unknown', True): 1})"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cohorts_rdd = cached.filter(lambda p: p[\"environment/settings/e10sCohort\"] is not None)\n",
"cohorts_count = cohorts_rdd.count()\n",
"cohorts = cohorts_rdd.map(lambda p: ((p[\"environment/settings/e10sCohort\"], p[\"environment/settings/e10sEnabled\"]), 1)).countByKey()\n",
"cohorts"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{(u'control', False): '21.36%',\n",
" (u'disqualified', False): '55.92%',\n",
" (u'disqualified', True): '0.00%',\n",
" (u'optedIn', False): '0.03%',\n",
" (u'optedIn', True): '0.01%',\n",
" (u'optedOut', False): '0.89%',\n",
" (u'optedOut', True): '0.00%',\n",
" (u'test', False): '0.12%',\n",
" (u'test', True): '21.57%',\n",
" (u'unknown', False): '0.10%',\n",
" (u'unknown', True): '0.00%'}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"{k:\"{:.2%}\".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# ### Distribution of e10sCohort in Beta46
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
get_ipython().magic(u'pylab inline')
# In[2]:
sc.defaultParallelism
# In[3]:
pings = get_pings(sc, app="Firefox", channel="beta", version="46.0", fraction=0.01)
# In[4]:
subset = get_pings_properties(pings, ["clientId",
"environment/settings/e10sEnabled",
"environment/settings/e10sCohort"])
# In[6]:
subset = get_one_ping_per_client(subset)
# In[7]:
cached = subset.cache()
# How many clients are we looking at?
# In[8]:
cached.count()
# In[14]:
cohorts_rdd = cached.filter(lambda p: p["environment/settings/e10sCohort"] is not None)
cohorts_count = cohorts_rdd.count()
cohorts = cohorts_rdd.map(lambda p: ((p["environment/settings/e10sCohort"], p["environment/settings/e10sEnabled"]), 1)).countByKey()
cohorts
# In[15]:
{k:"{:.2%}".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment