Skip to content

Instantly share code, notes, and snippets.

@georgf
Last active May 24, 2016 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save georgf/21a2330d9c599a0f2267e00a891a4cd7 to your computer and use it in GitHub Desktop.
Save georgf/21a2330d9c599a0f2267e00a891a4cd7 to your computer and use it in GitHub Desktop.
validate-desktop-date-header
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## [Bug 1144778](https://bugzilla.mozilla.org/show_bug.cgi?id=1144778) - Validate the Date header submissions"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"import re\n",
"import email.utils as eut\n",
"import datetime\n",
"import time\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pings = get_pings(sc,\n",
" app=\"Firefox\",\n",
" channel=\"nightly\",\n",
" build_id=(\"20160520000000\", \"20160524999999\"),\n",
" fraction=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"properties = [\n",
" 'meta/Date',\n",
" 'meta/submissionDate',\n",
" 'clientId'\n",
"]\n",
"pings = get_pings_properties(pings, properties)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'clientId': u'95c6432b-1cd8-46ad-86a5-5386e5023a7b',\n",
" 'meta/Date': u'Tue, 24 May 2016 02:27:45 GMT',\n",
" 'meta/submissionDate': u'20160524'}"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings.first()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Validate the expected ping contents."
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def valid_date_header(s):\n",
" date_pattern = '^\\D+, \\d{1,2} \\D+ \\d{4,4} \\d\\d:\\d\\d:\\d\\d GMT(\\\\+00:00)?$'\n",
" return re.match(date_pattern, s) != None"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def ping_check(ping):\n",
" props = {\n",
" 'meta/Date': [unicode],\n",
" 'meta/submissionDate': [unicode],\n",
" 'clientId': [unicode],\n",
" }\n",
"\n",
" for k,types in props.iteritems():\n",
" if not k in ping:\n",
" return 'missing field: ' + prop\n",
" if type(ping[k]) not in types:\n",
" return 'wrong type for ' + k\n",
" \n",
" if not valid_date_header(ping['meta/Date']):\n",
" return 'invalid date header'\n",
"\n",
" return 'valid'"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int, {'valid': 18505, 'wrong type for meta/Date': 381})"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = pings.map(lambda p: (ping_check(p), p))\n",
"results.countByKey()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So a subset of the pings are missing the meta/Date field.\n",
"This might be local developer builds with \"official\" build flags that don't have the Date header changes yet but submit on up-to-date build ids."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lets show examples for each result group."
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('wrong type for meta/Date',\n",
" {'clientId': '...', 'meta/Date': None, 'meta/submissionDate': u'20160524'}),\n",
" ('valid',\n",
" {'clientId': '...',\n",
" 'meta/Date': u'Tue, 24 May 2016 15:48:15 GMT',\n",
" 'meta/submissionDate': u'20160524'})]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect()\n",
"for k,p in ps:\n",
" p['clientId'] = '...'\n",
"ps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# ## [Bug 1144778](https://bugzilla.mozilla.org/show_bug.cgi?id=1144778) - Validate the Date header submissions
# In[38]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
import re
import email.utils as eut
import datetime
import time
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
# In[39]:
pings = get_pings(sc,
app="Firefox",
channel="nightly",
build_id=("20160520000000", "20160524999999"),
fraction=0.1)
# In[40]:
properties = [
'meta/Date',
'meta/submissionDate',
'clientId'
]
pings = get_pings_properties(pings, properties)
# In[41]:
pings.first()
# ### Validate the expected ping contents.
# In[42]:
def valid_date_header(s):
date_pattern = '^\D+, \d{1,2} \D+ \d{4,4} \d\d:\d\d:\d\d GMT(\\+00:00)?$'
return re.match(date_pattern, s) != None
# In[43]:
def ping_check(ping):
props = {
'meta/Date': [unicode],
'meta/submissionDate': [unicode],
'clientId': [unicode],
}
for k,types in props.iteritems():
if not k in ping:
return 'missing field: ' + prop
if type(ping[k]) not in types:
return 'wrong type for ' + k
if not valid_date_header(ping['meta/Date']):
return 'invalid date header'
return 'valid'
# In[44]:
results = pings.map(lambda p: (ping_check(p), p))
results.countByKey()
# So a subset of the pings are missing the meta/Date field.
# This might be local developer builds with "official" build flags that don't have the Date header changes yet but submit on up-to-date build ids.
# Lets show examples for each result group.
# In[45]:
ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect()
for k,p in ps:
p['clientId'] = '...'
ps
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment