Created
July 6, 2018 15:28
-
-
Save georgf/130fff0d991865d8d9905e5870d53822 to your computer and use it in GitHub Desktop.
Beta 61 use counter payload size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
import datetime as dt | |
import ujson as json | |
import pandas as pd | |
import numpy as np | |
import copy as cp | |
import matplotlib.pyplot as plt | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client | |
from moztelemetry.dataset import Dataset | |
get_ipython().magic(u'matplotlib inline') | |
# # Load ping data | |
# Get a bunch of pings from the last nightly & release Firefox. | |
# In[2]: | |
dataset = Dataset.from_source('telemetry') | |
# In[3]: | |
dataset.schema | |
# In[4]: | |
records = Dataset.from_source('telemetry') .where(docType='main') .where(appUpdateChannel='beta') .where(appVersion=lambda x: x.startswith('61.')) .records(sc, sample=0.1) | |
records.count() | |
# # Explore use counter data | |
# # Helper functions | |
# In[15]: | |
hs = records.first()["payload"]["histograms"] | |
use_counters = {k: hs[k] for k in hs if k.startswith("USE_COUNTER2_")} | |
use_counters | |
# In[16]: | |
def get_from_ping(ping, path): | |
try: | |
return reduce(lambda d, k: d[k], path.split("/"), ping) | |
except (KeyError, IndexError): | |
return None | |
def get_payload_size(ping): | |
p = cp.deepcopy(ping) | |
hs = get_from_ping(p, "payload/histograms") | |
use_counters = {k: hs[k] for k in hs if k.startswith("USE_COUNTER2_")} | |
return len(json.dumps(use_counters)) | |
# In[17]: | |
get_payload_size(records.first()) | |
# # Get Beta size data | |
# In[18]: | |
sizes = pd.Series(records.map(get_payload_size).collect()) | |
# In[23]: | |
(sizes / 1024).describe(percentiles=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999]) | |
# In[24]: | |
(sizes / 1024).hist() | |
plt.xlabel('Size in kb') | |
plt.ylabel('Frequency') | |
plt.title('Beta use counter payload size') | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment