chutten/beta46_e10s_cohort.ipynb Secret

## beta46_e10s_cohort.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Distribution of e10sCohort in Beta46"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
      "  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n",
      "WARNING: "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
      "`%matplotlib` prevents importing * from pylab and numpy\n"
     ]
    }
   ],
   "source": [
    "import ujson as json\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import plotly.plotly as py\n",
    "from plotly.graph_objs import *\n",
    "\n",
    "from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
    "\n",
    "%pylab inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sc.defaultParallelism"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pings = get_pings(sc, app=\"Firefox\", channel=\"beta\", version=\"46.0\", fraction=0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "subset = get_pings_properties(pings, [\"clientId\",\n",
    "                                      \"environment/settings/e10sEnabled\",\n",
    "                                      \"environment/settings/e10sCohort\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "subset = get_one_ping_per_client(subset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cached = subset.cache()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How many clients are we looking at?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "365528"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cached.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {(u'control', False): 33665,\n",
       "             (u'disqualified', False): 88126,\n",
       "             (u'disqualified', True): 1,\n",
       "             (u'optedIn', False): 46,\n",
       "             (u'optedIn', True): 13,\n",
       "             (u'optedOut', False): 1403,\n",
       "             (u'optedOut', True): 1,\n",
       "             (u'test', False): 190,\n",
       "             (u'test', True): 33993,\n",
       "             (u'unknown', False): 152,\n",
       "             (u'unknown', True): 1})"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cohorts_rdd = cached.filter(lambda p: p[\"environment/settings/e10sCohort\"] is not None)\n",
    "cohorts_count = cohorts_rdd.count()\n",
    "cohorts = cohorts_rdd.map(lambda p: ((p[\"environment/settings/e10sCohort\"], p[\"environment/settings/e10sEnabled\"]), 1)).countByKey()\n",
    "cohorts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{(u'control', False): '21.36%',\n",
       " (u'disqualified', False): '55.92%',\n",
       " (u'disqualified', True): '0.00%',\n",
       " (u'optedIn', False): '0.03%',\n",
       " (u'optedIn', True): '0.01%',\n",
       " (u'optedOut', False): '0.89%',\n",
       " (u'optedOut', True): '0.00%',\n",
       " (u'test', False): '0.12%',\n",
       " (u'test', True): '21.57%',\n",
       " (u'unknown', False): '0.10%',\n",
       " (u'unknown', True): '0.00%'}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "{k:\"{:.2%}\".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## beta46_e10s_cohort.py

# coding: utf-8

# ### Distribution of e10sCohort in Beta46

# In[1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

get_ipython().magic(u'pylab inline')


# In[2]:

sc.defaultParallelism


# In[3]:

pings = get_pings(sc, app="Firefox", channel="beta", version="46.0", fraction=0.01)


# In[4]:

subset = get_pings_properties(pings, ["clientId",
                                      "environment/settings/e10sEnabled",
                                      "environment/settings/e10sCohort"])


# In[6]:

subset = get_one_ping_per_client(subset)


# In[7]:

cached = subset.cache()


# How many clients are we looking at?

# In[8]:

cached.count()


# In[14]:

cohorts_rdd = cached.filter(lambda p: p["environment/settings/e10sCohort"] is not None)
cohorts_count = cohorts_rdd.count()
cohorts = cohorts_rdd.map(lambda p: ((p["environment/settings/e10sCohort"], p["environment/settings/e10sEnabled"]), 1)).countByKey()
cohorts


# In[15]:

{k:"{:.2%}".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Distribution of e10sCohort in Beta46"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false,
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
	" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n",
	"WARNING: "
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
	"Populating the interactive namespace from numpy and matplotlib\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"pylab import has clobbered these variables: ['Annotation', 'Figure']\n",
	"`%matplotlib` prevents importing * from pylab and numpy\n"
	]
	}
	],
	"source": [
	"import ujson as json\n",
	"import matplotlib.pyplot as plt\n",
	"import pandas as pd\n",
	"import numpy as np\n",
	"import plotly.plotly as py\n",
	"from plotly.graph_objs import *\n",
	"\n",
	"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records\n",
	"\n",
	"%pylab inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"64"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sc.defaultParallelism"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pings = get_pings(sc, app=\"Firefox\", channel=\"beta\", version=\"46.0\", fraction=0.01)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"subset = get_pings_properties(pings, [\"clientId\",\n",
	" \"environment/settings/e10sEnabled\",\n",
	" \"environment/settings/e10sCohort\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"subset = get_one_ping_per_client(subset)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"cached = subset.cache()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"How many clients are we looking at?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"365528"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cached.count()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {
	"collapsed": false,
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"defaultdict(int,\n",
	" {(u'control', False): 33665,\n",
	" (u'disqualified', False): 88126,\n",
	" (u'disqualified', True): 1,\n",
	" (u'optedIn', False): 46,\n",
	" (u'optedIn', True): 13,\n",
	" (u'optedOut', False): 1403,\n",
	" (u'optedOut', True): 1,\n",
	" (u'test', False): 190,\n",
	" (u'test', True): 33993,\n",
	" (u'unknown', False): 152,\n",
	" (u'unknown', True): 1})"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cohorts_rdd = cached.filter(lambda p: p[\"environment/settings/e10sCohort\"] is not None)\n",
	"cohorts_count = cohorts_rdd.count()\n",
	"cohorts = cohorts_rdd.map(lambda p: ((p[\"environment/settings/e10sCohort\"], p[\"environment/settings/e10sEnabled\"]), 1)).countByKey()\n",
	"cohorts"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{(u'control', False): '21.36%',\n",
	" (u'disqualified', False): '55.92%',\n",
	" (u'disqualified', True): '0.00%',\n",
	" (u'optedIn', False): '0.03%',\n",
	" (u'optedIn', True): '0.01%',\n",
	" (u'optedOut', False): '0.89%',\n",
	" (u'optedOut', True): '0.00%',\n",
	" (u'test', False): '0.12%',\n",
	" (u'test', True): '21.57%',\n",
	" (u'unknown', False): '0.10%',\n",
	" (u'unknown', True): '0.00%'}"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"{k:\"{:.2%}\".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.11"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}

	# coding: utf-8

	# ### Distribution of e10sCohort in Beta46

	# In[1]:

	import ujson as json
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np
	import plotly.plotly as py
	from plotly.graph_objs import *

	from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

	get_ipython().magic(u'pylab inline')


	# In[2]:

	sc.defaultParallelism


	# In[3]:

	pings = get_pings(sc, app="Firefox", channel="beta", version="46.0", fraction=0.01)


	# In[4]:

	subset = get_pings_properties(pings, ["clientId",
	"environment/settings/e10sEnabled",
	"environment/settings/e10sCohort"])


	# In[6]:

	subset = get_one_ping_per_client(subset)


	# In[7]:

	cached = subset.cache()


	# How many clients are we looking at?

	# In[8]:

	cached.count()


	# In[14]:

	cohorts_rdd = cached.filter(lambda p: p["environment/settings/e10sCohort"] is not None)
	cohorts_count = cohorts_rdd.count()
	cohorts = cohorts_rdd.map(lambda p: ((p["environment/settings/e10sCohort"], p["environment/settings/e10sEnabled"]), 1)).countByKey()
	cohorts


	# In[15]:

	{k:"{:.2%}".format(float(v) / float(cohorts_count)) for (k,v) in cohorts.iteritems()}