Skip to content

Instantly share code, notes, and snippets.

@bsmedberg
Created May 24, 2016 19:45
Show Gist options
  • Save bsmedberg/3189e0a5947e02af3a76de92506ec298 to your computer and use it in GitHub Desktop.
Save bsmedberg/3189e0a5947e02af3a76de92506ec298 to your computer and use it in GitHub Desktop.
clock-skew-nightly
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import IPython
from __future__ import division
from montecarlino import grouped_permutation_test
from moztelemetry.spark import get_pings, get_pings_properties
get_ipython().magic(u'pylab inline')
IPython.core.pylabtools.figsize(16, 7)
from operator import add
import datetime
import itertools
import collections
import dateutil
import dateutil.parser
# In[2]:
from moztelemetry.spark import get_one_ping_per_client
# In[3]:
criteria = {
'app': 'Firefox',
'channel': 'nightly',
'version': '49.0a1',
'submission_date': ("20160503", "20160523"),
'build_id': ("20160503000000", "20160601000000"),
}
main_pings = get_pings(sc, doc_type="main", **criteria)
# In[5]:
properties = [
"meta/Date",
"meta/Timestamp",
"creationDate",
"payload/info/subsessionStartDate",
"payload/info/subsessionLength",
"environment/os/name",
"environment/os/version",
"environment/build/buildId",
"environment/settings/update/channel",
]
main_data = get_pings_properties(main_pings, properties)
# In[ ]:
main_data = main_data.cache()
# In[ ]:
SECONDS_PER_DAY = 60 * 60 * 24
skew_errors = sc.accumulator(0)
def map_skew_days(p):
if p['meta/Date'] is None:
return
server_ts = datetime.datetime.fromtimestamp(p['meta/Timestamp'] / 1000000000, dateutil.tz.tzutc())
try:
client_ts = dateutil.parser.parse(p['meta/Date'])
except ValueError:
skew_errors.add(1)
return
yield int((server_ts - client_ts).total_seconds() / SECONDS_PER_DAY)
skew_by_day = nightly_data.flatMap(map_skew_days).countByValue()
# In[ ]:
skew_df = pd.DataFrame(skew_by_day.items(), columns=("days", "count")).sort_values(("days"))
skew_df
# In[ ]:
skew_df.plot.bar(x="days", y="count")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment