Skip to content

Instantly share code, notes, and snippets.

@georgf
Last active April 21, 2016 15:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save georgf/234b7c861cc78824071d0ed9cf7a6aa2 to your computer and use it in GitHub Desktop.
Save georgf/234b7c861cc78824071d0ed9cf7a6aa2 to your computer and use it in GitHub Desktop.
default-search-null-breakdown
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Bug 1249288 - Breakdown of null values for defaultSearch"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import datetime as dt\n",
"from uuid import UUID\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"submission_dates = (\"20160420\", \"20160422\")\n",
"core_pings = get_pings(sc,\n",
" app=\"Fennec\",\n",
" channel=\"beta\",\n",
" doc_type=\"core\",\n",
" source_version=\"2\",\n",
" submission_date=submission_dates,\n",
" fraction=1.0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"639411"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings_count = core_pings.count()\n",
"pings_count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many different clients are we seeing?"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"81067"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_per_client = get_one_ping_per_client(core_pings)\n",
"num_clients = one_per_client.count()\n",
"num_clients"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find pings which submit the distribution field"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distribution_pings = core_pings.filter(lambda p: p.get(\"distribution\", None) != None)\\\n",
" .collect()\n",
"len(distribution_pings)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# ### Bug 1249288 - Breakdown of null values for defaultSearch
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import datetime as dt
from uuid import UUID
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history
get_ipython().magic(u'pylab inline')
# In[2]:
submission_dates = ("20160420", "20160422")
core_pings = get_pings(sc,
app="Fennec",
channel="beta",
doc_type="core",
source_version="2",
submission_date=submission_dates,
fraction=1.0)
# In[3]:
pings_count = core_pings.count()
pings_count
# ### How many different clients are we seeing?
# In[4]:
one_per_client = get_one_ping_per_client(core_pings)
num_clients = one_per_client.count()
num_clients
# ### Find pings which submit the distribution field
# In[6]:
distribution_pings = core_pings.filter(lambda p: p.get("distribution", None) != None) .collect()
len(distribution_pings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment