Created
May 24, 2016 17:08
-
-
Save bsmedberg/6431e257f9ca2648125a83776fcb52a3 to your computer and use it in GitHub Desktop.
Ping latency
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[39]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import IPython | |
from __future__ import division | |
from montecarlino import grouped_permutation_test | |
from moztelemetry.spark import get_pings, get_pings_properties | |
get_ipython().magic(u'pylab inline') | |
IPython.core.pylabtools.figsize(16, 7) | |
from operator import add | |
import datetime | |
import itertools | |
import collections | |
import dateutil | |
import dateutil.parser | |
# In[2]: | |
sc.defaultParallelism | |
# In[ ]: | |
def get_property(value, path): | |
for key in path: | |
if not isinstance(value, dict) or key not in value: | |
return None | |
value = value[key] | |
return value | |
# In[4]: | |
criteria = { | |
'app': 'Firefox', | |
'channel': 'beta', | |
'version': '47.0', | |
'submission_date': "20160522", | |
} | |
main_pings = get_pings(sc, doc_type="main", **criteria) | |
# In[5]: | |
crash_pings = get_pings(sc, doc_type="crash", **criteria) | |
# In[18]: | |
# In[122]: | |
both_properties = [ | |
"meta/Timestamp", | |
"creationDate", | |
] | |
main_properties = both_properties + [ | |
"payload/info/subsessionStartDate", | |
"payload/info/subsessionLength", | |
] | |
crash_properties = both_properties + [ | |
"payload/crashDate", | |
"payload/metadata/CrashTime", | |
] | |
main_data = get_pings_properties(main_pings, main_properties) | |
crash_data = get_pings_properties(crash_pings, crash_properties) | |
# In[123]: | |
main_data = main_data.cache() | |
crash_data = crash_data.cache() | |
# In[124]: | |
fm = main_data.first() | |
fm | |
# In[29]: | |
fc = crash_data.first() | |
fc | |
# In[70]: | |
errcount = sc.accumulator(0) | |
def cd_to_timestamp(crash_ping): | |
try: | |
cd = dateutil.parser.parse(crash_ping['payload/crashDate']).date() | |
rd = datetime.date(2016, 5, 22) | |
days = (rd - cd).days | |
if days < 0: | |
days = -1 | |
elif days > 20: | |
days = 21 | |
yield days | |
except ValueError: | |
errcount.add(1) | |
pass | |
crash_ping_latency = crash_data.flatMap(cd_to_timestamp).countByValue() | |
print "errcount", errcount.value | |
# In[130]: | |
crash_df = pd.DataFrame(data=crash_ping_latency.items(), columns=("Latency", "Count")).sort_values(("Latency")) | |
crash_total = crash_df['Count'].sum() | |
by_pct = crash_df.assign(pct=crash_df["Count"].div(crash_total / 100)) | |
by_pct.plot.bar(x="Latency", y="pct", | |
title="Latency of activity date (local time) against receipt date (UTC) for crash pings") | |
# In[127]: | |
main_errcount = sc.accumulator(0) | |
def cd_to_timestamp(main_ping): | |
if main_ping['payload/info/subsessionStartDate'] is None: | |
return | |
try: | |
hours = (main_ping["payload/info/subsessionLength"] or 0) / 3600.0 | |
if hours > 25: | |
hours = 25 | |
elif hours < 0: | |
return | |
cd = dateutil.parser.parse(main_ping['payload/info/subsessionStartDate']).date() | |
rd = datetime.date(2016, 5, 22) | |
days = (rd - cd).days | |
if days < 0: | |
days = -1 | |
elif days > 20: | |
days = 21 | |
yield (days, hours) | |
except ValueError: | |
errcount.add(1) | |
pass | |
main_ping_latency = main_data.flatMap(cd_to_timestamp).foldByKey(0, add).collect() | |
print "errcount", main_errcount.value | |
main_df = pd.DataFrame(data=main_ping_latency, columns=("Latency", "Count")).sort_values(("Latency")) | |
main_df | |
# In[132]: | |
main_total = df['Count'].sum() | |
main_by_pct = main_df.assign(pct=main_df["Count"].div(main_total / 100)) | |
main_by_pct.plot.bar(x="Latency", y="pct", | |
title="Latency of activity date (local time) against receipt date (UTC) for main pings, by usage-hour") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment