Created
May 24, 2016 19:45
-
-
Save bsmedberg/3189e0a5947e02af3a76de92506ec298 to your computer and use it in GitHub Desktop.
clock-skew-nightly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import IPython | |
from __future__ import division | |
from montecarlino import grouped_permutation_test | |
from moztelemetry.spark import get_pings, get_pings_properties | |
get_ipython().magic(u'pylab inline') | |
IPython.core.pylabtools.figsize(16, 7) | |
from operator import add | |
import datetime | |
import itertools | |
import collections | |
import dateutil | |
import dateutil.parser | |
# In[2]: | |
from moztelemetry.spark import get_one_ping_per_client | |
# In[3]: | |
criteria = { | |
'app': 'Firefox', | |
'channel': 'nightly', | |
'version': '49.0a1', | |
'submission_date': ("20160503", "20160523"), | |
'build_id': ("20160503000000", "20160601000000"), | |
} | |
main_pings = get_pings(sc, doc_type="main", **criteria) | |
# In[5]: | |
properties = [ | |
"meta/Date", | |
"meta/Timestamp", | |
"creationDate", | |
"payload/info/subsessionStartDate", | |
"payload/info/subsessionLength", | |
"environment/os/name", | |
"environment/os/version", | |
"environment/build/buildId", | |
"environment/settings/update/channel", | |
] | |
main_data = get_pings_properties(main_pings, properties) | |
# In[ ]: | |
main_data = main_data.cache() | |
# In[ ]: | |
SECONDS_PER_DAY = 60 * 60 * 24 | |
skew_errors = sc.accumulator(0) | |
def map_skew_days(p): | |
if p['meta/Date'] is None: | |
return | |
server_ts = datetime.datetime.fromtimestamp(p['meta/Timestamp'] / 1000000000, dateutil.tz.tzutc()) | |
try: | |
client_ts = dateutil.parser.parse(p['meta/Date']) | |
except ValueError: | |
skew_errors.add(1) | |
return | |
yield int((server_ts - client_ts).total_seconds() / SECONDS_PER_DAY) | |
skew_by_day = nightly_data.flatMap(map_skew_days).countByValue() | |
# In[ ]: | |
skew_df = pd.DataFrame(skew_by_day.items(), columns=("days", "count")).sort_values(("days")) | |
skew_df | |
# In[ ]: | |
skew_df.plot.bar(x="days", y="count") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment