Last active
April 21, 2016 15:20
-
-
Save georgf/234b7c861cc78824071d0ed9cf7a6aa2 to your computer and use it in GitHub Desktop.
default-search-null-breakdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"### Bug 1249288 - Breakdown of null values for defaultSearch" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n", | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n", | |
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"import datetime as dt\n", | |
"from uuid import UUID\n", | |
"\n", | |
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n", | |
"\n", | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"submission_dates = (\"20160420\", \"20160422\")\n", | |
"core_pings = get_pings(sc,\n", | |
" app=\"Fennec\",\n", | |
" channel=\"beta\",\n", | |
" doc_type=\"core\",\n", | |
" source_version=\"2\",\n", | |
" submission_date=submission_dates,\n", | |
" fraction=1.0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"639411" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pings_count = core_pings.count()\n", | |
"pings_count" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### How many different clients are we seeing?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"81067" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"one_per_client = get_one_ping_per_client(core_pings)\n", | |
"num_clients = one_per_client.count()\n", | |
"num_clients" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Find pings which submit the distribution field" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"distribution_pings = core_pings.filter(lambda p: p.get(\"distribution\", None) != None)\\\n", | |
" .collect()\n", | |
"len(distribution_pings)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ### Bug 1249288 - Breakdown of null values for defaultSearch | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
import datetime as dt | |
from uuid import UUID | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history | |
get_ipython().magic(u'pylab inline') | |
# In[2]: | |
submission_dates = ("20160420", "20160422") | |
core_pings = get_pings(sc, | |
app="Fennec", | |
channel="beta", | |
doc_type="core", | |
source_version="2", | |
submission_date=submission_dates, | |
fraction=1.0) | |
# In[3]: | |
pings_count = core_pings.count() | |
pings_count | |
# ### How many different clients are we seeing? | |
# In[4]: | |
one_per_client = get_one_ping_per_client(core_pings) | |
num_clients = one_per_client.count() | |
num_clients | |
# ### Find pings which submit the distribution field | |
# In[6]: | |
distribution_pings = core_pings.filter(lambda p: p.get("distribution", None) != None) .collect() | |
len(distribution_pings) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment