Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
input-latency-cc-tabusage
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# In[5]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings_properties
from moztelemetry.dataset import Dataset
from moztelemetry.histogram import Histogram
from operator import add
from datetime import date, timedelta
get_ipython().magic(u'matplotlib inline')
# In[6]:
from collections import namedtuple
# In[7]:
def ping_filter(p):
if p.get("environment/system/os/name", None) != "Windows_NT":
return False
if p.get("payload/info/subsessionLength", 0) <= 0:
return False
if p.get("environment/settings/e10sEnabled", False) != True:
return False
addons = p.get("environment/addons/activeAddons", {}) or {}
for a in addons.itervalues():
if a.get("isSystem", False) != True:
return False
return True
# In[8]:
pings = Dataset.from_source("telemetry") .where(docType="main") .where(submissionDate=lambda d: "20170428" <= d <= "20170504") .where(appUpdateChannel="nightly") .records(sc)
# In[9]:
data = get_pings_properties(pings, [
"clientId",
"environment/system/os/name",
"environment/settings/e10sEnabled",
"environment/addons/activeAddons",
"payload/info/subsessionLength",
"payload/histograms/INPUT_EVENT_RESPONSE_MS",
"payload/histograms/GC_MAX_PAUSE_MS",
"payload/histograms/CYCLE_COLLECTOR_MAX_PAUSE",
"payload/histograms/GHOST_WINDOWS",
"payload/histograms/CHECKERBOARD_DURATION",
"payload/processes/parent/scalars/browser.engagement.max_concurrent_tab_count",
], with_processes=True)
# In[10]:
data2 = data.filter(ping_filter)
# In[11]:
def add_nullq(a, b):
if a is None:
return b
if b is None:
return a
return a + b
def max_nullq(a, b):
if a is None:
return b
if b is None:
return a
return max(a, b)
props = (
('session_length', 'payload/info/subsessionLength', add_nullq),
('input_event_response_chrome', 'payload/histograms/INPUT_EVENT_RESPONSE_MS_parent', add_nullq),
('input_event_response_content', 'payload/histograms/INPUT_EVENT_RESPONSE_MS_children', add_nullq),
('gc_max_pause_chrome', 'payload/histograms/GC_MAX_PAUSE_MS_parent', add_nullq),
('gc_max_pause_content', 'payload/histograms/GC_MAX_PAUSE_MS_children', add_nullq),
('cc_max_pause_chrome', 'payload/histograms/CYCLE_COLLECTOR_MAX_PAUSE_parent', add_nullq),
('cc_max_pause_content', 'payload/histograms/CYCLE_COLLECTOR_MAX_PAUSE_children', add_nullq),
('ghost_windows', 'payload/histograms/GHOST_WINDOWS', add_nullq),
('checkerboard_duration', 'payload/histograms/CHECKERBOARD_DURATION', add_nullq),
('max_tab_count', 'payload/processes/parent/scalars/browser.engagement.max_concurrent_tab_count', max_nullq),
)
PingData = namedtuple("PingData", (p for p, h, o in props))
def json_to_pingdata(d):
return PingData._make(d[h] for p, h, o in props)
def add_pingdata(a, b):
return PingData._make(o(va, vb) for (p, h, o), va, vb in zip(props, a, b))
data3 = data2.map(lambda p: (p['clientId'], json_to_pingdata(p)))
# In[27]:
data3 = data3.cache()
# In[12]:
data_by_client = data3.reduceByKey(add_pingdata).cache()
# In[13]:
data_by_client.count()
# In[14]:
two_hours = 60 * 60 * 2
real_users = data_by_client.filter(lambda (id, d): d.session_length > two_hours)
total_users = real_users.count()
# In[15]:
total_users
# # Ghost window and GC/CC pause times
# In[28]:
def ghostfinder((id, d)):
return (d.ghost_windows is not None) and ((d.ghost_windows.iloc[1::] > 0).any())
ghost_subsessions = data3.filter(ghostfinder).count()
all_subsessions = data3.count()
print "Fraction of subsessions that see ghost windows: {:.1f}%".format(float(ghost_subsessions) / all_subsessions * 100)
# In[16]:
ghost_users = real_users.filter(ghostfinder).count()
print "users who experienced ghost windows: {:d} ({:.1f}%)".format(ghost_users, ghost_users / float(total_users) * 100)
def pausefinder((id, d)):
if (d.gc_max_pause_chrome is not None) and (d.gc_max_pause_chrome.truncate(before=150) > 0).any():
return True
if (d.cc_max_pause_chrome is not None) and (d.cc_max_pause_chrome.truncate(before=150) > 0).any():
return True
return False
chrome_gcpause_users = real_users.filter(pausefinder).count()
print "users who experienced chrome GC/CC max pause time >150ms: {:d} ({:.1f}%)".format(chrome_gcpause_users, chrome_gcpause_users / float(total_users) * 100)
# In[17]:
def max_cc_pause((id, d)):
max_cc_pause = None
if d.cc_max_pause_chrome is not None:
max_cc_pause = (d.cc_max_pause_chrome > 0).sort_index(ascending=False).idxmax()
max_gc_pause = None
if d.gc_max_pause_chrome is not None:
max_gc_pause = (d.gc_max_pause_chrome > 0).sort_index(ascending=False).idxmax()
return max(max_cc_pause, max_gc_pause)
buckets = [0] + range(26, 200, 25) + range(201, 1001, 100) + range(1001, 10001, 500) + [10001]
max_chrome_gccc_pause_by_client = real_users.map(max_cc_pause).histogram(buckets)
print "Distribution of max chrome cc/gc pause time by user (over one week):"
cum = 0
for i in range(len(buckets) - 2, -1, -1):
start = buckets[i]
end = buckets[i+1] - 1
user_count = max_chrome_gccc_pause_by_client[1][i]
cum += user_count
print "{:5d} - {:5d}: {:4d} ({:4.1f}%) cumulative ({:4.1f}%)".format(start, end, user_count, user_count / float(total_users) * 100, cum / float(total_users) * 100)
# *Note: the above distribution is based on the histogram buckets. The GC histogram max is 1000 (one second). The CC histogram max is 10000 (ten seconds). This partly explains the uneven bump of the 1-second bucket for long GCs but short CCs.*
# In[18]:
def pausefinder_content((id, d)):
# don't bother measuring GC pause times here, because the max recorded GC pause time is
# one second. TODO?
if (d.cc_max_pause_content is not None) and (d.cc_max_pause_content.truncate(before=2500) > 0).any():
return True
return False
content_gcpause_users = real_users.filter(pausefinder_content).count()
print "users who experienced content CC max pause time >2500ms: {:d} ({:.1f}%)".format(content_gcpause_users, content_gcpause_users / float(total_users) * 100)
# In[19]:
def max_cc_pause_content((id, d)):
max_cc_pause = None
if d.cc_max_pause_content is not None:
max_cc_pause = (d.cc_max_pause_content > 0).sort_index(ascending=False).idxmax()
max_gc_pause = None
if d.gc_max_pause_content is not None:
max_gc_pause = (d.gc_max_pause_content > 0).sort_index(ascending=False).idxmax()
return max(max_cc_pause, max_gc_pause)
max_content_gccc_pause_by_client = real_users.map(max_cc_pause_content).histogram(buckets)
print "Distribution of max content cc/gc pause time by user (over one week):"
cum = 0
for i in range(len(buckets) - 2, -1, -1):
start = buckets[i]
end = buckets[i+1] - 1
user_count = max_content_gccc_pause_by_client[1][i]
cum += user_count
print "{:5d} - {:5d}: {:4d} ({:4.1f}%) cumulative ({:4.1f}%)".format(start, end, user_count, user_count / float(total_users) * 100, cum / float(total_users) * 100)
# Finally let's look at these GC pauses as MTBF. This isn't quite fair, because the measurements in question only record the max pause, not any pause. But it's likely that there's one dominant pause in a GC/CC cycle in general, so it's still interesting.
# In[ ]:
total_hours = float(aggregate_all.session_length / 60 / 60)
# In[25]:
def count_chrome_pauses((id, d)):
gc_count = 0 if d.gc_max_pause_chrome is None else d.gc_max_pause_chrome.truncate(before=150).sum()
cc_count = 0 if d.cc_max_pause_chrome is None else d.cc_max_pause_chrome.truncate(before=150).sum()
return gc_count + cc_count
total_chrome_pauses = real_users.map(count_chrome_pauses).reduce(add)
print "Chrome process pseudo-MTBF, chrome GC/CC pauses >150ms: {:.1f} hours".format(total_hours / total_chrome_pauses)
# # Checkerboarding MTBF
# I (bsmedberg) have asserted that checkerboarding happens infrequently-enough that it should not be a main focus of quantum flow efforts. This is based on an analysis of the duration of checkerboarding events from telemetry.
# In[20]:
def add_users(a, b):
a_client, a_data = a
b_client, b_data = b
return ('*', add_pingdata(a_data, b_data))
aggregate_all = real_users.reduce(add_users)[1]
# In[21]:
stotal = 0
for cutoff, count in aggregate_all.checkerboard_duration.sort_index(ascending=False).iteritems():
if cutoff < 150:
break
stotal += count
print " {:5}ms: {:0.3f} hours".format(cutoff, total_hours / stotal)
# # Tab Usage
#
# Many Firefox developers assume that most users browse with multiple tabs open on a regular basis. The data shows otherwise!
# In[22]:
tab_buckets = [1, 2, 3, 4, 5, 6, 10, 15, 20, 25, 50, 100, 200, 300, 1000]
tab_histogram = real_users.map(lambda (id, d): d.max_tab_count).histogram(tab_buckets)
print "Distribution of the maximum number of tabs users had open over one week:"
for i in range(0, len(tab_buckets) - 1):
start = tab_buckets[i]
end = tab_buckets[i+1] - 1
user_count = tab_histogram[1][i]
print "{:4d} - {:4d}: {:d} ({:.1f}%)".format(start, end, user_count, user_count / float(total_users) * 100)
# What about *heavy* users? If we define a heavy users as anyone who used Firefox more than 40 hours this week, how does that change the distribution?
# In[23]:
heavy_users = real_users.filter(lambda (id, d): d.session_length > 40 * 60 * 60)
tab_histogram = heavy_users.map(lambda (id, d): d.max_tab_count).histogram(tab_buckets)
total_heavy_users = heavy_users.count()
print "Distribution of the maximum number of tabs users had open over one week:"
for i in range(0, len(tab_buckets) - 1):
start = tab_buckets[i]
end = tab_buckets[i+1] - 1
user_count = tab_histogram[1][i]
print "{:4d} - {:4d}: {:d} ({:.1f}%)".format(start, end, user_count, user_count / float(total_heavy_users) * 100)
# Conclusion: Heavy user tab usage isn't that much different from the average user.
# *Caveat: this is nightly, which users are completely atypical.*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment