Skip to content

Instantly share code, notes, and snippets.

@bill-mccloskey
Last active February 21, 2017 19:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bill-mccloskey/41130211c988151cbb1bef2735f00953 to your computer and use it in GitHub Desktop.
Save bill-mccloskey/41130211c988151cbb1bef2735f00953 to your computer and use it in GitHub Desktop.
Runnable analysis
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# In[2]:
Dataset.from_source("telemetry").schema
# In[3]:
pings = Dataset.from_source("telemetry") .where(docType='main') .where(appBuildId=lambda x: x.startswith('20170219') or x.startswith('20170220')) .where(appUpdateChannel="nightly") .records(sc, sample=1.0)
# ... and extract only the attributes we need from the Telemetry submissions:
# In[4]:
subset = get_pings_properties(pings, ["payload/processes/content/keyedHistograms/MAIN_THREAD_RUNNABLE_MS"])
# In[5]:
def count(d):
keyed = d['payload/processes/content/keyedHistograms/MAIN_THREAD_RUNNABLE_MS'] or {}
result = []
for key in keyed:
hist = keyed[key]
values = hist['values']
s = 0
for index in values:
s += values[index]
result.append((key, s))
return result
# In[6]:
freq = subset.flatMap(count).reduceByKey(lambda a, b: a+b).collect()
# In[7]:
freq.sort(key=lambda d: d[1], reverse=True)
# In[8]:
[ (name, v / 1000000) for (name, v) in freq ]
# In[9]:
def aggregate_time(d):
keyed = d['payload/processes/content/keyedHistograms/MAIN_THREAD_RUNNABLE_MS'] or {}
result = []
for key in keyed:
hist = keyed[key]
result.append((key, hist['sum']))
return result
# In[10]:
freq = subset.flatMap(aggregate_time).reduceByKey(lambda a, b: a+b).collect()
# In[11]:
freq.sort(key=lambda d: d[1], reverse=True)
# In[12]:
freq
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment