mattwoodrow/wr-content-frame-time-variants.ipynb Secret

## wr-content-frame-time-variants.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              wr-content-frame-time-variants.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## wr-content-frame-time-variants.py

# coding: utf-8

# In[51]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from moztelemetry.dataset import Dataset

get_ipython().magic(u'matplotlib inline')


# We can look at the schema of the dataset we are interested in:

# In[52]:

Dataset.from_source('telemetry').schema


# Let's create a Dataset of Telemetry submissions for a given submission date:

# In[53]:

pings_dataset = (
    Dataset.from_source('telemetry')
    .where(docType='main')
    .where(submissionDate=lambda x: x > '20181128')
    .where(appUpdateChannel="nightly")
)


# Select only the properties we need and then take a 10% sample:

# In[54]:

pings = (
    pings_dataset
    .select(
        'clientId',
        buildId='application.buildId',
        frame_time='payload.processes.gpu.histograms.CONTENT_FRAME_TIME',
        frame_time_svg='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG',
        frame_time_without_resource_upload='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_RESOURCE_UPLOAD',
        frame_time_without_upload='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_UPLOAD',
        full_paint_time='payload.processes.gpu.histograms.CONTENT_FULL_PAINT_TIME',
        experiments='environment.experiments',
        osName='environment.system.os.name',
        gfx='environment.system.gfx')
    .records(sc, sample=1.)
)


# In[55]:

pings.count()


# Caching is fundamental as it allows for an iterative, real-time development workflow:

# In[56]:

cached = pings.cache()


# How many pings are we looking at?

# In[57]:

cached.count()


# In[58]:

wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-3-1492568" in p["experiments"])
wrExperiment.count()


# In[59]:

cached = cached.filter(lambda p: "features" in p["gfx"])
cached = cached.filter(lambda p: "wrQualified" in p["gfx"]["features"])
cached.count()


# In[60]:

wrQualified = cached.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available" )
wrQualified.count()
wrQualified = wrQualified.filter(lambda p: len(p["gfx"]["monitors"]) == 1)


# In[61]:

wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"])
wrExperiment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()


# In[62]:

wrExperiment = wrExperiment.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available")
wrExperiment = wrExperiment.filter(lambda p: len(p["gfx"]["monitors"]) == 1)
wrExperiment = wrExperiment.filter(lambda p: p['full_paint_time'])


# In[63]:

wrExperiment.map(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"]).countByValue()


# In[64]:

treatment = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "enabled")
control = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "disabled")
treatment.count(), control.count()


# In[65]:

treatment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()


# In[66]:

wrQualified.take(1)


# In[67]:

wrEnabled = treatment.filter(lambda p: p["gfx"]["features"]["compositor"] == "webrender")
wrDisabled = control.filter(lambda p: p["gfx"]["features"]["compositor"] == "d3d11")
wrEnabled.count(), wrDisabled.count()


# In[68]:

wrDisabled = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0))


# In[69]:

wrEnabled.count(), wrDisabled.count()


# In[70]:

def aggregate_series(s1, s2):
    """Function to sum up series; if one is None, return other"""
    if s1 is None:
        return s2
    if s2 is None:
        return s1
    return s1.add(s2, fill_value=0)

def roundDict(x):
    int_x = {int(k) : v for k, v in x.items()}
    d = {}
    lastValue = 0
    for (key, value) in sorted(int_x.iteritems()):
        if key < 100:
            lastValue = value
            continue
        rounded = key/100
        if rounded in d:
            d[rounded] += lastValue
        else:
            d[rounded] = lastValue
        lastValue = value
    return d

def aggregate_frame_time_hist(x, hist):
    result = (
        x.filter(lambda p: p[hist])
        .map(lambda p: pd.Series(roundDict(p[hist]['values'])))
        .reduce(aggregate_series)
    )

    result.index = [int(i) for i in result.index]
    return result.sort_index()

def aggregate_paint_time_hist(x, hist):
    result = (
        x.filter(lambda p: p[hist])
        .map(lambda p: pd.Series(p[hist]['values']))
        .reduce(aggregate_series)
    )

    result.index = [int(i) for i in result.index]
    return result.sort_index()


# In[71]:

aggregated_enabled = aggregate_frame_time_hist(wrEnabled, 'frame_time')
aggregated_enabled_svg = aggregate_frame_time_hist(wrEnabled, 'frame_time_svg')
aggregated_disabled = aggregate_frame_time_hist(wrDisabled, 'frame_time')


# In[72]:

aggregated_disabled


# In[73]:

aggregated_enabled_svg


# In[74]:

aggregated_enabled


# In[75]:

aggregated_enabled_no_svg = aggregated_enabled - aggregated_enabled_svg
aggregated_enabled_no_svg


# In[76]:

def percent_slow_frames(dataset1, dataset2):
    percent_enabled = dataset1[1:].map(lambda x: 100*x/dataset1[0:].sum())
    percent_disabled = dataset2[1:].map(lambda x: 100*x/dataset2[0:].sum())
    df = pd.DataFrame()
    df['enabled'] = percent_enabled
    df['disabled'] = percent_disabled
    p = df.plot(kind='bar', figsize=(15, 7))
    p.set_xlabel("frames")
    p.set_ylabel("percentage of content paints that happen in x frames")
    return p

def relative_change_slow_frames(dataset1, dataset2):
    percent_enabled = dataset1[1:].map(lambda x: x/dataset1[0:].sum())
    percent_disabled = dataset2[1:].map(lambda x: x/dataset2[0:].sum())
    p = (100*(percent_enabled - percent_disabled) / percent_disabled).plot(kind='bar', figsize=(15, 7))
    p.set_xlabel("frames")
    p.set_ylabel("% change in content paints of x frames from enabling WebRender")


# In[77]:

percent_slow_frames(aggregated_enabled, aggregated_disabled)


# In[78]:

percent_slow_frames(aggregated_enabled_svg, aggregated_disabled)


# In[79]:

percent_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)


# In[80]:

relative_change_slow_frames(aggregated_enabled, aggregated_disabled)


# In[81]:

relative_change_slow_frames(aggregated_enabled_svg, aggregated_disabled)


# In[82]:

relative_change_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)


# Compute the percentage of frames that took more than 2 frames (CONTENT_FRAME_TIME score > 200)

# In[83]:

def percent_frames_slower_than(count, dataset):
    percent = dataset[count-1:].map(lambda x: 100*x/dataset[0:].sum())
    return percent.sum()

percent_frames_slower_than(2, aggregated_enabled)


# In[84]:

percent_frames_slower_than(2, aggregated_enabled_svg)


# In[85]:

percent_frames_slower_than(2, aggregated_enabled_no_svg)


# In[86]:

percent_frames_slower_than(2, aggregated_disabled)


# Now try with some other thresholds:

# In[87]:

percent_frames_slower_than(3, aggregated_enabled), percent_frames_slower_than(3, aggregated_disabled)


# In[88]:

percent_frames_slower_than(4, aggregated_enabled), percent_frames_slower_than(4, aggregated_disabled)


# In[89]:

percent_frames_slower_than(8, aggregated_enabled), percent_frames_slower_than(8, aggregated_disabled)


# In[90]:

percent_frames_slower_than(16, aggregated_enabled), percent_frames_slower_than(16, aggregated_disabled)


# Now let's look at frame times with upload time excluded

# In[91]:

aggregated_enabled_without_resource_upload = aggregate_frame_time_hist(wrEnabled, 'frame_time_without_resource_upload')
percent_frames_slower_than(2, aggregated_enabled_without_resource_upload)


# In[92]:

aggregated_enabled_without_upload = aggregate_frame_time_hist(wrEnabled, 'frame_time_without_upload')
percent_frames_slower_than(2, aggregated_enabled_without_upload)


# Full paint time records (in ms) the total time for the 'content' phase, at which point we should be able to start compositing.
#
# The goal is to have this finish within 1 vsync interval, so that compositing can start on the next vsync interval.
#
# I'm using a score of >=16 as the 'failure' value, though normalizing to % of vsync (using gfx.monitors[0].refreshRate) and using 100 might be slightly more accurate.

# In[93]:

aggregated_enabled_full_paint_time = aggregate_paint_time_hist(wrEnabled, 'full_paint_time')
aggregated_disabled_full_paint_time = aggregate_paint_time_hist(wrDisabled, 'full_paint_time')

percent_enabled_full_paint_time = aggregated_enabled_full_paint_time.map(lambda x: 100.0*x/aggregated_enabled_full_paint_time[0:].sum())
percent_disabled_full_paint_time = aggregated_disabled_full_paint_time.map(lambda x: 100.0*x/aggregated_disabled_full_paint_time[0:].sum())

df = pd.DataFrame()
df['enabled'] = percent_enabled_full_paint_time
df['disabled'] = percent_disabled_full_paint_time
p = df.cumsum().plot(kind='bar', figsize=(15, 7))
p.set_xlabel("ms")
p.set_ylabel("full paint times")


# In[94]:

def percent_paints_equal_or_slower_than(count, dataset):
    percent = dataset[count:].map(lambda x: 100*x/dataset[0:].sum())
    return percent.sum()

percent_paints_equal_or_slower_than(16, aggregated_enabled_full_paint_time)


# In[95]:

percent_paints_equal_or_slower_than(16, aggregated_disabled_full_paint_time)


# Local profiling shows that we frequently spend some time between vsync and the start of the transaction (which is used for the start time of this measurement), this is time spent in layout/style flushing, rAF callbacks etc.
#
# This time does contribute to whether we miss the vsync for compositing though, so I think excluding it makes this measurement less useful than it could be in terms of correlating these results to the CONTENT_FRAME_TIME regression.
#
# The time between the vsync tick and the start of the transaction should be roughly consistent between configurations, but we don't have much data on how long it is or what the distribution is.
#
# We can shorten our 16ms window to account for this and get super rough ideas of what the differences are:

# In[96]:

percent_paints_equal_or_slower_than(14, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(14, aggregated_disabled_full_paint_time)


# In[97]:

percent_paints_equal_or_slower_than(12, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(12, aggregated_disabled_full_paint_time)


# In[98]:

percent_paints_equal_or_slower_than(10, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(10, aggregated_disabled_full_paint_time)


# In[99]:

percent_paints_equal_or_slower_than(8, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(8, aggregated_disabled_full_paint_time)


# In[100]:

percent_paints_equal_or_slower_than(6, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(6, aggregated_disabled_full_paint_time)


# In[ ]:

	# coding: utf-8

	# In[51]:

	import ujson as json
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np

	from moztelemetry.dataset import Dataset

	get_ipython().magic(u'matplotlib inline')


	# We can look at the schema of the dataset we are interested in:

	# In[52]:

	Dataset.from_source('telemetry').schema


	# Let's create a Dataset of Telemetry submissions for a given submission date:

	# In[53]:

	pings_dataset = (
	Dataset.from_source('telemetry')
	.where(docType='main')
	.where(submissionDate=lambda x: x > '20181128')
	.where(appUpdateChannel="nightly")
	)


	# Select only the properties we need and then take a 10% sample:

	# In[54]:

	pings = (
	pings_dataset
	.select(
	'clientId',
	buildId='application.buildId',
	frame_time='payload.processes.gpu.histograms.CONTENT_FRAME_TIME',
	frame_time_svg='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITH_SVG',
	frame_time_without_resource_upload='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_RESOURCE_UPLOAD',
	frame_time_without_upload='payload.processes.gpu.histograms.CONTENT_FRAME_TIME_WITHOUT_UPLOAD',
	full_paint_time='payload.processes.gpu.histograms.CONTENT_FULL_PAINT_TIME',
	experiments='environment.experiments',
	osName='environment.system.os.name',
	gfx='environment.system.gfx')
	.records(sc, sample=1.)
	)


	# In[55]:

	pings.count()


	# Caching is fundamental as it allows for an iterative, real-time development workflow:

	# In[56]:

	cached = pings.cache()


	# How many pings are we looking at?

	# In[57]:

	cached.count()


	# In[58]:

	wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-3-1492568" in p["experiments"])
	wrExperiment.count()


	# In[59]:

	cached = cached.filter(lambda p: "features" in p["gfx"])
	cached = cached.filter(lambda p: "wrQualified" in p["gfx"]["features"])
	cached.count()


	# In[60]:

	wrQualified = cached.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available" )
	wrQualified.count()
	wrQualified = wrQualified.filter(lambda p: len(p["gfx"]["monitors"]) == 1)


	# In[61]:

	wrExperiment = cached.filter(lambda p: "experiments" in p and p["experiments"]).filter(lambda p: "prefflip-webrender-v1-2-1492568" in p["experiments"])
	wrExperiment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()


	# In[62]:

	wrExperiment = wrExperiment.filter(lambda p: p["gfx"]["features"]["wrQualified"]["status"] == "available")
	wrExperiment = wrExperiment.filter(lambda p: len(p["gfx"]["monitors"]) == 1)
	wrExperiment = wrExperiment.filter(lambda p: p['full_paint_time'])


	# In[63]:

	wrExperiment.map(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"]).countByValue()


	# In[64]:

	treatment = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "enabled")
	control = wrExperiment.filter(lambda p: p["experiments"]["prefflip-webrender-v1-2-1492568"]["branch"] == "disabled")
	treatment.count(), control.count()


	# In[65]:

	treatment.map(lambda p: p["gfx"]["features"]["compositor"]).countByValue()


	# In[66]:

	wrQualified.take(1)


	# In[67]:

	wrEnabled = treatment.filter(lambda p: p["gfx"]["features"]["compositor"] == "webrender")
	wrDisabled = control.filter(lambda p: p["gfx"]["features"]["compositor"] == "d3d11")
	wrEnabled.count(), wrDisabled.count()


	# In[68]:

	wrDisabled = wrDisabled.sample(False, wrEnabled.count()/(wrDisabled.count()*1.0))


	# In[69]:

	wrEnabled.count(), wrDisabled.count()


	# In[70]:

	def aggregate_series(s1, s2):
	"""Function to sum up series; if one is None, return other"""
	if s1 is None:
	return s2
	if s2 is None:
	return s1
	return s1.add(s2, fill_value=0)

	def roundDict(x):
	int_x = {int(k) : v for k, v in x.items()}
	d = {}
	lastValue = 0
	for (key, value) in sorted(int_x.iteritems()):
	if key < 100:
	lastValue = value
	continue
	rounded = key/100
	if rounded in d:
	d[rounded] += lastValue
	else:
	d[rounded] = lastValue
	lastValue = value
	return d

	def aggregate_frame_time_hist(x, hist):
	result = (
	x.filter(lambda p: p[hist])
	.map(lambda p: pd.Series(roundDict(p[hist]['values'])))
	.reduce(aggregate_series)
	)

	result.index = [int(i) for i in result.index]
	return result.sort_index()

	def aggregate_paint_time_hist(x, hist):
	result = (
	x.filter(lambda p: p[hist])
	.map(lambda p: pd.Series(p[hist]['values']))
	.reduce(aggregate_series)
	)

	result.index = [int(i) for i in result.index]
	return result.sort_index()


	# In[71]:

	aggregated_enabled = aggregate_frame_time_hist(wrEnabled, 'frame_time')
	aggregated_enabled_svg = aggregate_frame_time_hist(wrEnabled, 'frame_time_svg')
	aggregated_disabled = aggregate_frame_time_hist(wrDisabled, 'frame_time')


	# In[72]:

	aggregated_disabled


	# In[73]:

	aggregated_enabled_svg


	# In[74]:

	aggregated_enabled


	# In[75]:

	aggregated_enabled_no_svg = aggregated_enabled - aggregated_enabled_svg
	aggregated_enabled_no_svg


	# In[76]:

	def percent_slow_frames(dataset1, dataset2):
	percent_enabled = dataset1[1:].map(lambda x: 100*x/dataset1[0:].sum())
	percent_disabled = dataset2[1:].map(lambda x: 100*x/dataset2[0:].sum())
	df = pd.DataFrame()
	df['enabled'] = percent_enabled
	df['disabled'] = percent_disabled
	p = df.plot(kind='bar', figsize=(15, 7))
	p.set_xlabel("frames")
	p.set_ylabel("percentage of content paints that happen in x frames")
	return p

	def relative_change_slow_frames(dataset1, dataset2):
	percent_enabled = dataset1[1:].map(lambda x: x/dataset1[0:].sum())
	percent_disabled = dataset2[1:].map(lambda x: x/dataset2[0:].sum())
	p = (100*(percent_enabled - percent_disabled) / percent_disabled).plot(kind='bar', figsize=(15, 7))
	p.set_xlabel("frames")
	p.set_ylabel("% change in content paints of x frames from enabling WebRender")


	# In[77]:

	percent_slow_frames(aggregated_enabled, aggregated_disabled)


	# In[78]:

	percent_slow_frames(aggregated_enabled_svg, aggregated_disabled)


	# In[79]:

	percent_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)


	# In[80]:

	relative_change_slow_frames(aggregated_enabled, aggregated_disabled)


	# In[81]:

	relative_change_slow_frames(aggregated_enabled_svg, aggregated_disabled)


	# In[82]:

	relative_change_slow_frames(aggregated_enabled_no_svg, aggregated_disabled)


	# Compute the percentage of frames that took more than 2 frames (CONTENT_FRAME_TIME score > 200)

	# In[83]:

	def percent_frames_slower_than(count, dataset):
	percent = dataset[count-1:].map(lambda x: 100*x/dataset[0:].sum())
	return percent.sum()

	percent_frames_slower_than(2, aggregated_enabled)


	# In[84]:

	percent_frames_slower_than(2, aggregated_enabled_svg)


	# In[85]:

	percent_frames_slower_than(2, aggregated_enabled_no_svg)


	# In[86]:

	percent_frames_slower_than(2, aggregated_disabled)


	# Now try with some other thresholds:

	# In[87]:

	percent_frames_slower_than(3, aggregated_enabled), percent_frames_slower_than(3, aggregated_disabled)


	# In[88]:

	percent_frames_slower_than(4, aggregated_enabled), percent_frames_slower_than(4, aggregated_disabled)


	# In[89]:

	percent_frames_slower_than(8, aggregated_enabled), percent_frames_slower_than(8, aggregated_disabled)


	# In[90]:

	percent_frames_slower_than(16, aggregated_enabled), percent_frames_slower_than(16, aggregated_disabled)


	# Now let's look at frame times with upload time excluded

	# In[91]:

	aggregated_enabled_without_resource_upload = aggregate_frame_time_hist(wrEnabled, 'frame_time_without_resource_upload')
	percent_frames_slower_than(2, aggregated_enabled_without_resource_upload)


	# In[92]:

	aggregated_enabled_without_upload = aggregate_frame_time_hist(wrEnabled, 'frame_time_without_upload')
	percent_frames_slower_than(2, aggregated_enabled_without_upload)


	# Full paint time records (in ms) the total time for the 'content' phase, at which point we should be able to start compositing.
	#
	# The goal is to have this finish within 1 vsync interval, so that compositing can start on the next vsync interval.
	#
	# I'm using a score of >=16 as the 'failure' value, though normalizing to % of vsync (using gfx.monitors[0].refreshRate) and using 100 might be slightly more accurate.

	# In[93]:

	aggregated_enabled_full_paint_time = aggregate_paint_time_hist(wrEnabled, 'full_paint_time')
	aggregated_disabled_full_paint_time = aggregate_paint_time_hist(wrDisabled, 'full_paint_time')

	percent_enabled_full_paint_time = aggregated_enabled_full_paint_time.map(lambda x: 100.0*x/aggregated_enabled_full_paint_time[0:].sum())
	percent_disabled_full_paint_time = aggregated_disabled_full_paint_time.map(lambda x: 100.0*x/aggregated_disabled_full_paint_time[0:].sum())

	df = pd.DataFrame()
	df['enabled'] = percent_enabled_full_paint_time
	df['disabled'] = percent_disabled_full_paint_time
	p = df.cumsum().plot(kind='bar', figsize=(15, 7))
	p.set_xlabel("ms")
	p.set_ylabel("full paint times")


	# In[94]:

	def percent_paints_equal_or_slower_than(count, dataset):
	percent = dataset[count:].map(lambda x: 100*x/dataset[0:].sum())
	return percent.sum()

	percent_paints_equal_or_slower_than(16, aggregated_enabled_full_paint_time)


	# In[95]:

	percent_paints_equal_or_slower_than(16, aggregated_disabled_full_paint_time)


	# Local profiling shows that we frequently spend some time between vsync and the start of the transaction (which is used for the start time of this measurement), this is time spent in layout/style flushing, rAF callbacks etc.
	#
	# This time does contribute to whether we miss the vsync for compositing though, so I think excluding it makes this measurement less useful than it could be in terms of correlating these results to the CONTENT_FRAME_TIME regression.
	#
	# The time between the vsync tick and the start of the transaction should be roughly consistent between configurations, but we don't have much data on how long it is or what the distribution is.
	#
	# We can shorten our 16ms window to account for this and get super rough ideas of what the differences are:

	# In[96]:

	percent_paints_equal_or_slower_than(14, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(14, aggregated_disabled_full_paint_time)


	# In[97]:

	percent_paints_equal_or_slower_than(12, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(12, aggregated_disabled_full_paint_time)


	# In[98]:

	percent_paints_equal_or_slower_than(10, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(10, aggregated_disabled_full_paint_time)


	# In[99]:

	percent_paints_equal_or_slower_than(8, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(8, aggregated_disabled_full_paint_time)


	# In[100]:

	percent_paints_equal_or_slower_than(6, aggregated_enabled_full_paint_time), percent_paints_equal_or_slower_than(6, aggregated_disabled_full_paint_time)


	# In[ ]: