Created
November 12, 2018 03:25
-
-
Save mattwoodrow/6743c06bf847c8adbe68a1ef95adaf34 to your computer and use it in GitHub Desktop.
wr-content-frame-time-thresholds
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
from moztelemetry.dataset import Dataset | |
get_ipython().magic(u'matplotlib inline') | |
# We can look at the schema of the dataset we are interested in: | |
# In[2]: | |
Dataset.from_source('telemetry').schema | |
# Let's create a Dataset of Telemetry submissions for a given submission date: | |
# In[3]: | |
pings_dataset = ( | |
Dataset.from_source('telemetry') | |
.where(docType='main') | |
#.where(appBuildId='20180721100146') | |
#.where(submissionDate ='20180925') | |
.where(submissionDate=lambda x: x > '20181100') | |
.where(appUpdateChannel="nightly") | |
) | |
# Select only the properties we need and then take a 10% sample: | |
# In[4]: | |
pings = ( | |
pings_dataset | |
.select( | |
'clientId', | |
buildId='application.buildId', | |
frame_time='payload.processes.gpu.histograms.CONTENT_FRAME_TIME.values', | |
frame_time_sum='payload.processes.gpu.histograms.CONTENT_FRAME_TIME.sum', | |
frame_time_svg='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG.values', | |
frame_time_svg_sum='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG.sum', | |
experiments='environment.experiments', | |
osName='environment.system.os.name', | |
gfx='environment.system.gfx') | |
.records(sc, sample=1.) | |
) | |
# In[5]: | |
#pings = ( | |
# pings_dataset | |
# .records(sc, sample=0.01) | |
#) | |
#pings.take(1) | |
# | |
# In[6]: | |
pings.count() | |
# In[7]: | |
pings.take(4) | |
# In[8]: | |
# We add two extra steps. The first rewrites the ping to have some | |
# information more easily accessible (like the primary adapter), | |
# and the second step removes any pings that don't have adapter | |
# information. | |
def rewrite_ping(p): | |
adapters = p.get('gfx', None).get('adapters', None) | |
if not adapters: | |
return None | |
adapter = adapters[0] | |
p['adapter'] = adapter | |
# Convert the version to a tuple of integers. | |
#if 'driverVersion' in adapter: | |
# p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()] | |
return p | |
def filter_ping(p): | |
return 'adapter' in p | |
#rpings = pings.map(rewrite_ping).filter(filter_ping) | |
#rpings = rpings.cache() | |
#rpings.count() | |
# To prevent pseudoreplication, let's consider only a single submission for each client. As this step requires a distributed shuffle, it should always be run only after extracting the attributes of interest with *Dataset.select()*. | |
# In[9]: | |
#subset = ( | |
# rpings | |
# .map(lambda p: (p['clientId'], p)) | |
# .reduceByKey(lambda p1, p2: p1) | |
# .map(lambda p: p[1]) | |
#) | |
# Caching is fundamental as it allows for an iterative, real-time development workflow: | |
# In[10]: | |
cached = pings.cache() | |
# How many pings are we looking at? | |
# In[11]: | |
cached.count() | |
# In[12]: | |
wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"]) | |
wrExperiment.count() | |
# In[13]: | |
cached = cached.filter(lambda p: "features" in p["gfx"]) | |
cached = cached.filter(lambda p: "wrQualified" in p["gfx"]["features"]) | |
cached.count() | |
# In[14]: | |
wrQualified = cached.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available" ) | |
wrQualified.count() | |
wrQualified = wrQualified.filter(lambda p: len(p["gfx"]["monitors"]) == 1) | |
# In[15]: | |
wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"]) | |
wrExperiment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue() | |
# In[16]: | |
wrExperiment = wrExperiment.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available") | |
wrExperiment = wrExperiment.filter(lambda p: len(p["gfx"]["monitors"]) == 1 and p["gfx"]["monitors"][0]["refreshRate"] == 60) | |
# In[17]: | |
wrExperiment.map(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"]).countByValue() | |
# In[18]: | |
treatment = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "enabled") | |
control = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "disabled") | |
treatment.count(), control.count() | |
# In[19]: | |
treatment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue() | |
# In[20]: | |
wrQualified.take(1) | |
# In[21]: | |
wrEnabled = treatment.filter(lambda p: p["gfx"]["features"]["compositor"] == "webrender") | |
wrDisabled = control.filter(lambda p: p["gfx"]["features"]["compositor"] == "d3d11") | |
wrEnabled.count(), wrDisabled.count() | |
# In[22]: | |
wrDisabled2 = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0)) | |
wrDisabled3 = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0)) | |
wrDisabled = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0)) | |
# In[23]: | |
wrDisabled3.count(), wrDisabled.count(), wrDisabled2.count(), wrEnabled.count() | |
# In[24]: | |
def aggregate_series(s1, s2): | |
"""Function to sum up series; if one is None, return other""" | |
if s1 is None: | |
return s2 | |
if s2 is None: | |
return s1 | |
return s1.add(s2, fill_value=0) | |
def roundDict(x): | |
int_x = {int(k) : v for k, v in x.items()} | |
d = {} | |
lastValue = 0 | |
for (key, value) in sorted(int_x.iteritems()): | |
if key < 100: | |
lastValue = value | |
continue | |
rounded = key/100 | |
if rounded in d: | |
d[rounded] += lastValue | |
else: | |
d[rounded] = lastValue | |
lastValue = value | |
return d | |
# In[25]: | |
aggregated_enabled_svg = ( | |
wrEnabled | |
.filter(lambda p: p['frame_time_svg']) | |
.map(lambda p: pd.Series(roundDict(p['frame_time_svg']))) | |
.reduce(aggregate_series) | |
) | |
aggregated_enabled_svg.index = [int(i) for i in aggregated_enabled_svg.index] | |
aggregated_enabled_svg = aggregated_enabled_svg.sort_index() | |
aggregated_enabled = ( | |
wrEnabled | |
.filter(lambda p: p['frame_time']) | |
.map(lambda p: pd.Series(roundDict(p['frame_time']))) | |
.reduce(aggregate_series) | |
) | |
aggregated_enabled.index = [int(i) for i in aggregated_enabled.index] | |
aggregated_enabled = aggregated_enabled.sort_index() | |
aggregated_disabled = ( | |
wrDisabled | |
.filter(lambda p: p['frame_time']) | |
.map(lambda p: pd.Series(roundDict(p['frame_time']))) | |
.reduce(aggregate_series) | |
) | |
aggregated_disabled.index = [int(i) for i in aggregated_disabled.index] | |
aggregated_disabled = aggregated_disabled.sort_index() | |
# In[26]: | |
aggregated_disabled | |
# In[27]: | |
aggregated_enabled_svg | |
# In[28]: | |
aggregated_enabled | |
# In[29]: | |
aggregated_enabled_no_svg = aggregated_enabled - aggregated_enabled_svg | |
aggregated_enabled_no_svg | |
# In[30]: | |
def percent_slow_frames(dataset1, dataset2): | |
percent_enabled = dataset1[1:].map(lambda x: 100*x/dataset1[0:].sum()) | |
percent_disabled = dataset2[1:].map(lambda x: 100*x/dataset2[0:].sum()) | |
df = pd.DataFrame() | |
df['enabled'] = percent_enabled | |
df['disabled'] = percent_disabled | |
p = df.plot(kind='bar', figsize=(15, 7)) | |
p.set_xlabel("frames") | |
p.set_ylabel("percentage of content paints that happen in x frames") | |
return p | |
def relative_change_slow_frames(dataset1, dataset2): | |
percent_enabled = dataset1[1:].map(lambda x: x/dataset1[0:].sum()) | |
percent_disabled = dataset2[1:].map(lambda x: x/dataset2[0:].sum()) | |
p = (100*(percent_enabled - percent_disabled) / percent_disabled).plot(kind='bar', figsize=(15, 7)) | |
p.set_xlabel("frames") | |
p.set_ylabel("% change in content paints of x frames from enabling WebRender") | |
# In[31]: | |
percent_slow_frames(aggregated_enabled, aggregated_disabled) | |
# In[32]: | |
percent_slow_frames(aggregated_enabled_svg, aggregated_disabled) | |
# In[33]: | |
percent_slow_frames(aggregated_enabled_no_svg, aggregated_disabled) | |
# In[34]: | |
relative_change_slow_frames(aggregated_enabled, aggregated_disabled) | |
# In[35]: | |
relative_change_slow_frames(aggregated_enabled_svg, aggregated_disabled) | |
# In[36]: | |
relative_change_slow_frames(aggregated_enabled_no_svg, aggregated_disabled) | |
# Compute the percentage of frames that took more than 2 frames (CONTENT_FRAME_TIME score > 200) | |
# In[45]: | |
def percent_frames_slower_than(count, dataset): | |
percent = dataset[count-1:].map(lambda x: 100*x/dataset[0:].sum()) | |
return percent.sum() | |
percent_frames_slower_than(2, aggregated_enabled) | |
# In[46]: | |
percent_frames_slower_than(2, aggregated_enabled_svg) | |
# In[47]: | |
percent_frames_slower_than(2, aggregated_enabled_no_svg) | |
# In[48]: | |
percent_frames_slower_than(2, aggregated_disabled) | |
# Now try with some other thresholds: | |
# In[50]: | |
percent_frames_slower_than(3, aggregated_enabled), percent_frames_slower_than(3, aggregated_disabled) | |
# In[51]: | |
percent_frames_slower_than(4, aggregated_enabled), percent_frames_slower_than(4, aggregated_disabled) | |
# In[52]: | |
percent_frames_slower_than(8, aggregated_enabled), percent_frames_slower_than(8, aggregated_disabled) | |
# In[53]: | |
percent_frames_slower_than(16, aggregated_enabled), percent_frames_slower_than(16, aggregated_disabled) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment