# coding: utf-8
# ## [Bug 1144778]( - Validate the Date header submissions
# In[38]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
import re
import email.utils as eut
import datetime
import time
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
# In[39]:
pings = get_pings(sc,
build_id=("20160520000000", "20160524999999"),
# In[40]:
properties = [
pings = get_pings_properties(pings, properties)
# In[41]:
# ### Validate the expected ping contents.
# In[42]:
def valid_date_header(s):
date_pattern = '^\D+, \d{1,2} \D+ \d{4,4} \d\d:\d\d:\d\d GMT(\\+00:00)?$'
return re.match(date_pattern, s) != None
# In[43]:
def ping_check(ping):
props = {
'meta/Date': [unicode],
'meta/submissionDate': [unicode],
'clientId': [unicode],
for k,types in props.iteritems():
if not k in ping:
return 'missing field: ' + prop
if type(ping[k]) not in types:
return 'wrong type for ' + k
if not valid_date_header(ping['meta/Date']):
return 'invalid date header'
return 'valid'
# In[44]:
results = p: (ping_check(p), p))
# So a subset of the pings are missing the meta/Date field.
# This might be local developer builds with "official" build flags that don't have the Date header changes yet but submit on up-to-date build ids.
# Lets show examples for each result group.
# In[45]:
ps = results.groupByKey().map(lambda t: (t[0], list(t[1])[0])).collect()
for k,p in ps:
p['clientId'] = '...'
# In[ ]:
