Skip to content

Instantly share code, notes, and snippets.

@georgf
Last active May 24, 2016 16:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save georgf/21a2330d9c599a0f2267e00a891a4cd7 to your computer and use it in GitHub Desktop.
Save georgf/21a2330d9c599a0f2267e00a891a4cd7 to your computer and use it in GitHub Desktop.
validate-desktop-date-header
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# ## [Bug 1144778](https://bugzilla.mozilla.org/show_bug.cgi?id=1144778) - Validate the Date header submissions
# In[38]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
import re
import email.utils as eut
import datetime
import time
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
# In[39]:
pings = get_pings(sc,
app="Firefox",
channel="nightly",
build_id=("20160520000000", "20160524999999"),
fraction=0.1)
# In[40]:
properties = [
'meta/Date',
'meta/submissionDate',
'clientId'
]
pings = get_pings_properties(pings, properties)
# In[41]:
pings.first()
# ### Validate the expected ping contents.
# In[42]:
def valid_date_header(s):
date_pattern = '^\D+, \d{1,2} \D+ \d{4,4} \d\d:\d\d:\d\d GMT(\\+00:00)?$'
return re.match(date_pattern, s) != None
# In[43]:
def ping_check(ping):
props = {
'meta/Date': [unicode],
'meta/submissionDate': [unicode],
'clientId': [unicode],
}
for k,types in props.iteritems():
if not k in ping:
return 'missing field: ' + prop
if type(ping[k]) not in types:
return 'wrong type for ' + k
if not valid_date_header(ping['meta/Date']):
return 'invalid date header'
return 'valid'
# In[44]:
results = pings.map(lambda p: (ping_check(p), p))
results.countByKey()
# So a subset of the pings are missing the meta/Date field.
# This might be local developer builds with "official" build flags that don't have the Date header changes yet but submit on up-to-date build ids.
# Lets show examples for each result group.
# In[45]:
ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect()
for k,p in ps:
p['clientId'] = '...'
ps
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment