Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@mattwoodrow
Created November 12, 2018 03:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattwoodrow/6743c06bf847c8adbe68a1ef95adaf34 to your computer and use it in GitHub Desktop.
Save mattwoodrow/6743c06bf847c8adbe68a1ef95adaf34 to your computer and use it in GitHub Desktop.
wr-content-frame-time-thresholds
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# We can look at the schema of the dataset we are interested in:
# In[2]:
Dataset.from_source('telemetry').schema
# Let's create a Dataset of Telemetry submissions for a given submission date:
# In[3]:
pings_dataset = (
Dataset.from_source('telemetry')
.where(docType='main')
#.where(appBuildId='20180721100146')
#.where(submissionDate ='20180925')
.where(submissionDate=lambda x: x > '20181100')
.where(appUpdateChannel="nightly")
)
# Select only the properties we need and then take a 10% sample:
# In[4]:
pings = (
pings_dataset
.select(
'clientId',
buildId='application.buildId',
frame_time='payload.processes.gpu.histograms.CONTENT_FRAME_TIME.values',
frame_time_sum='payload.processes.gpu.histograms.CONTENT_FRAME_TIME.sum',
frame_time_svg='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG.values',
frame_time_svg_sum='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG.sum',
experiments='environment.experiments',
osName='environment.system.os.name',
gfx='environment.system.gfx')
.records(sc, sample=1.)
)
# In[5]:
#pings = (
# pings_dataset
# .records(sc, sample=0.01)
#)
#pings.take(1)
#
# In[6]:
pings.count()
# In[7]:
pings.take(4)
# In[8]:
# We add two extra steps. The first rewrites the ping to have some
# information more easily accessible (like the primary adapter),
# and the second step removes any pings that don't have adapter
# information.
def rewrite_ping(p):
adapters = p.get('gfx', None).get('adapters', None)
if not adapters:
return None
adapter = adapters[0]
p['adapter'] = adapter
# Convert the version to a tuple of integers.
#if 'driverVersion' in adapter:
# p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()]
return p
def filter_ping(p):
return 'adapter' in p
#rpings = pings.map(rewrite_ping).filter(filter_ping)
#rpings = rpings.cache()
#rpings.count()
# To prevent pseudoreplication, let's consider only a single submission for each client. As this step requires a distributed shuffle, it should always be run only after extracting the attributes of interest with *Dataset.select()*.
# In[9]:
#subset = (
# rpings
# .map(lambda p: (p['clientId'], p))
# .reduceByKey(lambda p1, p2: p1)
# .map(lambda p: p[1])
#)
# Caching is fundamental as it allows for an iterative, real-time development workflow:
# In[10]:
cached = pings.cache()
# How many pings are we looking at?
# In[11]:
cached.count()
# In[12]:
wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"])
wrExperiment.count()
# In[13]:
cached = cached.filter(lambda p: "features" in p["gfx"])
cached = cached.filter(lambda p: "wrQualified" in p["gfx"]["features"])
cached.count()
# In[14]:
wrQualified = cached.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available" )
wrQualified.count()
wrQualified = wrQualified.filter(lambda p: len(p["gfx"]["monitors"]) == 1)
# In[15]:
wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"])
wrExperiment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()
# In[16]:
wrExperiment = wrExperiment.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available")
wrExperiment = wrExperiment.filter(lambda p: len(p["gfx"]["monitors"]) == 1 and p["gfx"]["monitors"][0]["refreshRate"] == 60)
# In[17]:
wrExperiment.map(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"]).countByValue()
# In[18]:
treatment = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "enabled")
control = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "disabled")
treatment.count(), control.count()
# In[19]:
treatment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()
# In[20]:
wrQualified.take(1)
# In[21]:
wrEnabled = treatment.filter(lambda p: p["gfx"]["features"]["compositor"] == "webrender")
wrDisabled = control.filter(lambda p: p["gfx"]["features"]["compositor"] == "d3d11")
wrEnabled.count(), wrDisabled.count()
# In[22]:
wrDisabled2 = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0))
wrDisabled3 = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0))
wrDisabled = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0))
# In[23]:
wrDisabled3.count(), wrDisabled.count(), wrDisabled2.count(), wrEnabled.count()
# In[24]:
def aggregate_series(s1, s2):
"""Function to sum up series; if one is None, return other"""
if s1 is None:
return s2
if s2 is None:
return s1
return s1.add(s2, fill_value=0)
def roundDict(x):
int_x = {int(k) : v for k, v in x.items()}
d = {}
lastValue = 0
for (key, value) in sorted(int_x.iteritems()):
if key < 100:
lastValue = value
continue
rounded = key/100
if rounded in d:
d[rounded] += lastValue
else:
d[rounded] = lastValue
lastValue = value
return d
# In[25]:
aggregated_enabled_svg = (
wrEnabled
.filter(lambda p: p['frame_time_svg'])
.map(lambda p: pd.Series(roundDict(p['frame_time_svg'])))
.reduce(aggregate_series)
)
aggregated_enabled_svg.index = [int(i) for i in aggregated_enabled_svg.index]
aggregated_enabled_svg = aggregated_enabled_svg.sort_index()
aggregated_enabled = (
wrEnabled
.filter(lambda p: p['frame_time'])
.map(lambda p: pd.Series(roundDict(p['frame_time'])))
.reduce(aggregate_series)
)
aggregated_enabled.index = [int(i) for i in aggregated_enabled.index]
aggregated_enabled = aggregated_enabled.sort_index()
aggregated_disabled = (
wrDisabled
.filter(lambda p: p['frame_time'])
.map(lambda p: pd.Series(roundDict(p['frame_time'])))
.reduce(aggregate_series)
)
aggregated_disabled.index = [int(i) for i in aggregated_disabled.index]
aggregated_disabled = aggregated_disabled.sort_index()
# In[26]:
aggregated_disabled
# In[27]:
aggregated_enabled_svg
# In[28]:
aggregated_enabled
# In[29]:
aggregated_enabled_no_svg = aggregated_enabled - aggregated_enabled_svg
aggregated_enabled_no_svg
# In[30]:
def percent_slow_frames(dataset1, dataset2):
percent_enabled = dataset1[1:].map(lambda x: 100*x/dataset1[0:].sum())
percent_disabled = dataset2[1:].map(lambda x: 100*x/dataset2[0:].sum())
df = pd.DataFrame()
df['enabled'] = percent_enabled
df['disabled'] = percent_disabled
p = df.plot(kind='bar', figsize=(15, 7))
p.set_xlabel("frames")
p.set_ylabel("percentage of content paints that happen in x frames")
return p
def relative_change_slow_frames(dataset1, dataset2):
percent_enabled = dataset1[1:].map(lambda x: x/dataset1[0:].sum())
percent_disabled = dataset2[1:].map(lambda x: x/dataset2[0:].sum())
p = (100*(percent_enabled - percent_disabled) / percent_disabled).plot(kind='bar', figsize=(15, 7))
p.set_xlabel("frames")
p.set_ylabel("% change in content paints of x frames from enabling WebRender")
# In[31]:
percent_slow_frames(aggregated_enabled, aggregated_disabled)
# In[32]:
percent_slow_frames(aggregated_enabled_svg, aggregated_disabled)
# In[33]:
percent_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)
# In[34]:
relative_change_slow_frames(aggregated_enabled, aggregated_disabled)
# In[35]:
relative_change_slow_frames(aggregated_enabled_svg, aggregated_disabled)
# In[36]:
relative_change_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)
# Compute the percentage of frames that took more than 2 frames (CONTENT_FRAME_TIME score > 200)
# In[45]:
def percent_frames_slower_than(count, dataset):
percent = dataset[count-1:].map(lambda x: 100*x/dataset[0:].sum())
return percent.sum()
percent_frames_slower_than(2, aggregated_enabled)
# In[46]:
percent_frames_slower_than(2, aggregated_enabled_svg)
# In[47]:
percent_frames_slower_than(2, aggregated_enabled_no_svg)
# In[48]:
percent_frames_slower_than(2, aggregated_disabled)
# Now try with some other thresholds:
# In[50]:
percent_frames_slower_than(3, aggregated_enabled), percent_frames_slower_than(3, aggregated_disabled)
# In[51]:
percent_frames_slower_than(4, aggregated_enabled), percent_frames_slower_than(4, aggregated_disabled)
# In[52]:
percent_frames_slower_than(8, aggregated_enabled), percent_frames_slower_than(8, aggregated_disabled)
# In[53]:
percent_frames_slower_than(16, aggregated_enabled), percent_frames_slower_than(16, aggregated_disabled)
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment