Last active
May 24, 2016 16:20
-
-
Save georgf/21a2330d9c599a0f2267e00a891a4cd7 to your computer and use it in GitHub Desktop.
validate-desktop-date-header
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## [Bug 1144778](https://bugzilla.mozilla.org/show_bug.cgi?id=1144778) - Validate the Date header submissions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"from plotly.graph_objs import *\n", | |
"import re\n", | |
"import email.utils as eut\n", | |
"import datetime\n", | |
"import time\n", | |
"\n", | |
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"pings = get_pings(sc,\n", | |
" app=\"Firefox\",\n", | |
" channel=\"nightly\",\n", | |
" build_id=(\"20160520000000\", \"20160524999999\"),\n", | |
" fraction=0.1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"properties = [\n", | |
" 'meta/Date',\n", | |
" 'meta/submissionDate',\n", | |
" 'clientId'\n", | |
"]\n", | |
"pings = get_pings_properties(pings, properties)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'clientId': u'95c6432b-1cd8-46ad-86a5-5386e5023a7b',\n", | |
" 'meta/Date': u'Tue, 24 May 2016 02:27:45 GMT',\n", | |
" 'meta/submissionDate': u'20160524'}" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pings.first()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Validate the expected ping contents." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def valid_date_header(s):\n", | |
" date_pattern = '^\\D+, \\d{1,2} \\D+ \\d{4,4} \\d\\d:\\d\\d:\\d\\d GMT(\\\\+00:00)?$'\n", | |
" return re.match(date_pattern, s) != None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def ping_check(ping):\n", | |
" props = {\n", | |
" 'meta/Date': [unicode],\n", | |
" 'meta/submissionDate': [unicode],\n", | |
" 'clientId': [unicode],\n", | |
" }\n", | |
"\n", | |
" for k,types in props.iteritems():\n", | |
" if not k in ping:\n", | |
" return 'missing field: ' + prop\n", | |
" if type(ping[k]) not in types:\n", | |
" return 'wrong type for ' + k\n", | |
" \n", | |
" if not valid_date_header(ping['meta/Date']):\n", | |
" return 'invalid date header'\n", | |
"\n", | |
" return 'valid'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"defaultdict(int, {'valid': 18505, 'wrong type for meta/Date': 381})" | |
] | |
}, | |
"execution_count": 44, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"results = pings.map(lambda p: (ping_check(p), p))\n", | |
"results.countByKey()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"So a subset of the pings are missing the meta/Date field.\n", | |
"This might be local developer builds with \"official\" build flags that don't have the Date header changes yet but submit on up-to-date build ids." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Lets show examples for each result group." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('wrong type for meta/Date',\n", | |
" {'clientId': '...', 'meta/Date': None, 'meta/submissionDate': u'20160524'}),\n", | |
" ('valid',\n", | |
" {'clientId': '...',\n", | |
" 'meta/Date': u'Tue, 24 May 2016 15:48:15 GMT',\n", | |
" 'meta/submissionDate': u'20160524'})]" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect()\n", | |
"for k,p in ps:\n", | |
" p['clientId'] = '...'\n", | |
"ps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ## [Bug 1144778](https://bugzilla.mozilla.org/show_bug.cgi?id=1144778) - Validate the Date header submissions | |
# In[38]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
from plotly.graph_objs import * | |
import re | |
import email.utils as eut | |
import datetime | |
import time | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records | |
# In[39]: | |
pings = get_pings(sc, | |
app="Firefox", | |
channel="nightly", | |
build_id=("20160520000000", "20160524999999"), | |
fraction=0.1) | |
# In[40]: | |
properties = [ | |
'meta/Date', | |
'meta/submissionDate', | |
'clientId' | |
] | |
pings = get_pings_properties(pings, properties) | |
# In[41]: | |
pings.first() | |
# ### Validate the expected ping contents. | |
# In[42]: | |
def valid_date_header(s): | |
date_pattern = '^\D+, \d{1,2} \D+ \d{4,4} \d\d:\d\d:\d\d GMT(\\+00:00)?$' | |
return re.match(date_pattern, s) != None | |
# In[43]: | |
def ping_check(ping): | |
props = { | |
'meta/Date': [unicode], | |
'meta/submissionDate': [unicode], | |
'clientId': [unicode], | |
} | |
for k,types in props.iteritems(): | |
if not k in ping: | |
return 'missing field: ' + prop | |
if type(ping[k]) not in types: | |
return 'wrong type for ' + k | |
if not valid_date_header(ping['meta/Date']): | |
return 'invalid date header' | |
return 'valid' | |
# In[44]: | |
results = pings.map(lambda p: (ping_check(p), p)) | |
results.countByKey() | |
# So a subset of the pings are missing the meta/Date field. | |
# This might be local developer builds with "official" build flags that don't have the Date header changes yet but submit on up-to-date build ids. | |
# Lets show examples for each result group. | |
# In[45]: | |
ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect() | |
for k,p in ps: | |
p['clientId'] = '...' | |
ps | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment