Skip to content

Instantly share code, notes, and snippets.

@georgf
Created July 6, 2018 15:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save georgf/130fff0d991865d8d9905e5870d53822 to your computer and use it in GitHub Desktop.
Save georgf/130fff0d991865d8d9905e5870d53822 to your computer and use it in GitHub Desktop.
Beta 61 use counter payload size
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# In[1]:
import datetime as dt
import ujson as json
import pandas as pd
import numpy as np
import copy as cp
import matplotlib.pyplot as plt
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# # Load ping data
# Get a bunch of pings from the last nightly & release Firefox.
# In[2]:
dataset = Dataset.from_source('telemetry')
# In[3]:
dataset.schema
# In[4]:
records = Dataset.from_source('telemetry') .where(docType='main') .where(appUpdateChannel='beta') .where(appVersion=lambda x: x.startswith('61.')) .records(sc, sample=0.1)
records.count()
# # Explore use counter data
# # Helper functions
# In[15]:
hs = records.first()["payload"]["histograms"]
use_counters = {k: hs[k] for k in hs if k.startswith("USE_COUNTER2_")}
use_counters
# In[16]:
def get_from_ping(ping, path):
try:
return reduce(lambda d, k: d[k], path.split("/"), ping)
except (KeyError, IndexError):
return None
def get_payload_size(ping):
p = cp.deepcopy(ping)
hs = get_from_ping(p, "payload/histograms")
use_counters = {k: hs[k] for k in hs if k.startswith("USE_COUNTER2_")}
return len(json.dumps(use_counters))
# In[17]:
get_payload_size(records.first())
# # Get Beta size data
# In[18]:
sizes = pd.Series(records.map(get_payload_size).collect())
# In[23]:
(sizes / 1024).describe(percentiles=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999])
# In[24]:
(sizes / 1024).hist()
plt.xlabel('Size in kb')
plt.ylabel('Frequency')
plt.title('Beta use counter payload size')
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment