Skip to content

Instantly share code, notes, and snippets.

Created May 2, 2017 09:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dexterp37/c0dd82374b49cf17539ded0e680af585 to your computer and use it in GitHub Desktop.
Save Dexterp37/c0dd82374b49cf17539ded0e680af585 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# # Find histograms with empty keys
# ### Find histograms with empty keys
# In[4]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
import IPython
from moztelemetry import Dataset, get_pings_properties, get_one_ping_per_client
from pprint import pprint
get_ipython().magic(u'pylab inline')
# In[31]:
channels = ["nightly", "aurora", "beta", "release"]
build_info = {
"nightly": {
"fraction": 0.1,
"build_ids": lambda x: x >= "20170422"
"aurora": {
"fraction": 0.1,
"build_ids": lambda x: True
"beta": {
"fraction": 0.1,
"build_ids": lambda x: True
"release": {
"fraction": 0.003,
"build_ids": lambda x: x >= "20161104"
pings = {}
for c in channels:
pings[c] = Dataset.from_source("telemetry") .where(docType="main") .where(appUpdateChannel=c) .where(submissionDate=lambda x: "20170422" <= x <= "20170425") .where(appBuildId=build_info[c].get("build_ids")) .where(sourceVersion="4") .records(sc, sample=build_info[c].get("fraction"))
# ... now extract the names of all keyed histograms with empty key strings (from all valid-looking pings).
# In[33]:
def get_keyed_histograms(p):
if not isinstance(p, dict) or "payload" not in p or not isinstance(p["payload"], dict) or "keyedHistograms" not in p["payload"] or not isinstance(p["payload"]["keyedHistograms"], dict):
return {}
return p.get("payload", {}).get("keyedHistograms", {})
# This extracts the keyed histograms names which have an empty key string.
def extract_affected_histograms(p):
khs = get_keyed_histograms(p)
names = [name for name,kh in khs.iteritems() if "" in kh]
return names
extracts = {}
for c,ps in pings.iteritems():
extracts[c] = ps.flatMap(extract_affected_histograms)
# Let's get sorted lists of the hit counts per channel.
# In[34]:
nameCounts = {}
for channel,names in extracts.iteritems():
counts = names.countByValue()
nameCounts[channel] = sorted(counts.iteritems(), key=lambda t: t[1], reverse=True)
# In[35]:
for channel in channels:
df = pd.DataFrame([x for _,x in nameCounts[channel]],
[x for x,_ in nameCounts[channel]])
print "\n" + channel + "\n"
df.columns = ["# of hits in " + channel]
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment