Skip to content

Instantly share code, notes, and snippets.

@chutten
Created September 29, 2016 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chutten/a17b541f6d0af2527dffc4caaf9666c4 to your computer and use it in GitHub Desktop.
Save chutten/a17b541f6d0af2527dffc4caaf9666c4 to your computer and use it in GitHub Desktop.
beta_spinners_frequencies
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# ### Egregious Spinners on Beta 50 and their Frequencies of Occurence
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
import IPython
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
get_ipython().magic(u'pylab inline')
IPython.core.pylabtools.figsize(16, 7)
# In[2]:
sc.defaultParallelism
# In[3]:
pings = get_pings(sc, app="Firefox", channel="beta", version="50.0", submission_date="20160923", fraction=1)
# In[4]:
pings.count()
# In[5]:
subset = get_pings_properties(pings, ["clientId",
"environment/system/os/name",
"environment/system/memoryMB",
"environment/system/cpu/speedMHz",
"environment/addons/activeAddons",
"payload/histograms/GC_MAX_PAUSE_MS",
"payload/histograms/CYCLE_COLLECTOR_MAX_PAUSE",
"payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS"], with_processes=True)
# In[6]:
acceptable_addons = set([
"e10srollout@mozilla.org",
"firefox@getpocket.com",
"webcompat@mozilla.org",
])
def no_addons(p):
return p["environment/addons/activeAddons"] is None or len(p["environment/addons/activeAddons"]) == 0 or len(set(p["environment/addons/activeAddons"].keys()) | acceptable_addons) == len(acceptable_addons)
acceptable = subset.filter(no_addons)
# In[7]:
subset = acceptable
# In[8]:
ping_count = subset.count()
ping_count
# In[9]:
spinning_pings = subset.filter(lambda p: p["payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS"] is not None)
spinning_pings.count()
# In[10]:
client_count = get_one_ping_per_client(subset).count()
client_count
# In[11]:
spinning_clients_count = get_one_ping_per_client(spinning_pings).count()
spinning_clients_count
# In[12]:
hgram = "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS"
def has_longish_spinner(p):
if p[hgram] is None:
return False
for bucket, count in p[hgram].iteritems():
if bucket > 1000 and count > 0:
return True
return False
longish_spinning_pings = spinning_pings.filter(has_longish_spinner)
longish_spinning_pings.count()
# In[13]:
longish_spinning_clients = get_one_ping_per_client(longish_spinning_pings)
longish_spinning_clients.count()
# #### Check frequencies of spinners per client, per bucket
# In[14]:
client_spinner_freq = spinning_pings .flatMap(lambda p: [((p["clientId"], bucket), p[hgram][bucket]) for bucket in p[hgram].keys()]) .reduceByKey(lambda a,b: a+b)
# In[15]:
limit = 50
for bucket in spinning_pings.take(1)[0][hgram].keys():
bucket_client_spinner_freq = client_spinner_freq .filter(lambda x: x[0][1] == bucket) .map(lambda x: x[1] if x[1] < limit else limit) # cap to a max of `limit` spinners
bucket_spinner_series = pd.Series(bucket_client_spinner_freq.collect())
bucket_spinner_series.hist(bins=limit, log=True, width=1)
plt.xlim(0, limit)
plt.title("Spinners of length %d" % bucket)
plt.xlabel("Number of spinners ( %d total )" % bucket_spinner_series.sum())
plt.ylabel("Number of clients")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment