Created
July 3, 2014 16:01
-
-
Save vitillo/47ca30f7c85d62064467 to your computer and use it in GitHub Desktop.
GPU Analaysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"version": 1, | |
"dimensions": [ | |
{ | |
"field_name": "reason", | |
"allowed_values": ["saved-session"] | |
}, | |
{ | |
"field_name": "appName", | |
"allowed_values": "Firefox" | |
}, | |
{ | |
"field_name": "appUpdateChannel", | |
"allowed_values": ["nightly"] | |
}, | |
{ | |
"field_name": "appVersion", | |
"allowed_values": "33.0a1" | |
}, | |
{ | |
"field_name": "appBuildID", | |
"allowed_values": "*" | |
}, | |
{ | |
"field_name": "submission_date", | |
"allowed_values": ["20140626"] | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import numpy | |
import math | |
import scikits.bootstrap as sb | |
def cmf(hist): | |
res = [] | |
total = 0 | |
for idx, value in enumerate(hist): | |
total += value | |
res.append(total) | |
return res | |
def lower_bound(labels, bin): | |
return labels[bin] if bin > 0 else 0 | |
def med_bin(labels, freq): | |
half_total_freq = float(sum(freq))/2 | |
for idx, cm in enumerate(cmf(freq)): | |
if cm >= half_total_freq: | |
return idx | |
def width(labels, bin): | |
return labels[bin + 1] - labels[bin] if bin + 1 < len(labels) else float('inf') | |
# http://www.vitutor.com/statistics/descriptive/median.html | |
def median(labels, values): | |
median_bin = med_bin(labels, values) | |
lower_limit = lower_bound(labels, median_bin) | |
lower_limit_cmf = lower_bound(cmf(values), median_bin - 1) | |
half_total_freq = float(sum(values))/2 | |
w = width(labels, median_bin) | |
median_bin_freq = values[median_bin] if values[median_bin] > 0 else 1 | |
if w == float('inf'): | |
return lower_limit | |
else: | |
return lower_limit + w*(half_total_freq - lower_limit_cmf)/median_bin_freq | |
def check_numeric_limits(dmin, dmax, n_buckets): | |
if type(dmin) != int: | |
raise DefinitionException, "minimum is not a number" | |
if type(dmax) != int: | |
raise DefinitionException, "maximum is not a number" | |
if type(n_buckets) != int: | |
raise DefinitionException, "number of buckets is not a number" | |
def exponential_buckets(dmin, dmax, n_buckets): | |
check_numeric_limits(dmin, dmax, n_buckets) | |
log_max = math.log(dmax); | |
bucket_index = 2; | |
ret_array = [0] * n_buckets | |
current = dmin | |
ret_array[1] = current | |
for bucket_index in range(2, n_buckets): | |
log_current = math.log(current) | |
log_ratio = (log_max - log_current) / (n_buckets - bucket_index) | |
log_next = log_current + log_ratio | |
next_value = int(math.floor(math.exp(log_next) + 0.5)) | |
if next_value > current: | |
current = next_value | |
else: | |
current = current + 1 | |
ret_array[bucket_index] = current | |
return ret_array | |
def clean(s): | |
return normalize(s).translate(None, ",") | |
def normalize(s): | |
if type(s) == unicode: | |
return s.encode('utf8', 'ignore') | |
else: | |
return str(s) | |
def bootstrap_resample(X, n=None): | |
if n == None: | |
n = len(X) | |
resample_i = numpy.floor(numpy.random.rand(n)*len(X)).astype(int) | |
X_resample = X[resample_i] | |
return X_resample | |
def percentile_method(X, reps=1000, method=numpy.average, alpha=0.1): | |
X = numpy.array(X) | |
samples_statistic = [] | |
for rep in range(reps): | |
sample = bootstrap_resample(X) | |
samples_statistic.append(method(sample)) | |
return numpy.percentile(samples_statistic, [alpha, 100-alpha]) | |
# Specific for FX_TAB_ANIM_ANY_FRAME_INTERVAL_MS | |
ta_buckets = exponential_buckets(7, 500, 50) | |
def map(k, d, v, cx): | |
j = json.loads(v) | |
info = j['info'] | |
vendor_id = info.get('adapterVendorID', "N\A") | |
device_id = info.get('adapterDeviceID', "N\A") | |
gpu2active = info.get('isGPU2Active', False) | |
driver = info.get('adapterDriverVersion', 'N\A') | |
histograms = j['histograms'] | |
ta_hist = histograms.get('FX_TAB_ANIM_ANY_FRAME_INTERVAL_MS', None) | |
if ta_hist is None: | |
return | |
if sum(ta_hist[:50]) == 0: | |
return | |
cx.write((clean(vendor_id), clean(device_id), clean(driver), clean(gpu2active)), ta_hist) | |
cx.write((clean(vendor_id), clean(device_id), clean(driver), "ALL"), ta_hist) | |
cx.write((clean(vendor_id), clean(device_id), "ALL", "ALL"), ta_hist) | |
cx.write((clean(vendor_id), "ALL", "ALL", "ALL"), ta_hist) | |
cx.write(("ALL", "ALL", "ALL", "ALL"), ta_hist) | |
def setup_reduce(cx): | |
cx.field_separator = "," | |
def reduce(k, v, cx): | |
# Let's have a look at an animation histogram | |
#Aggregate histograms | |
hist = numpy.array(v[0]) | |
for partial_hist in v[1:]: | |
partial_hist = numpy.array(partial_hist) | |
hist += partial_hist | |
values = [] | |
for idx, bucket in enumerate(ta_buckets): | |
rep = int(hist[idx]) | |
values += rep * [bucket] | |
# CI calculated with bootstrap isn't really useful, so don't compute it | |
p = k + tuple([str(median(ta_buckets, hist[:-5])), str(len(v))]) | |
cx.write(p[0], ",".join(p[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment