Skip to content

Instantly share code, notes, and snippets.

@jmaher
Created December 21, 2021 23:47
Show Gist options
  • Save jmaher/dfc293fffa60ddfb416e8541f0de6585 to your computer and use it in GitHub Desktop.
Save jmaher/dfc293fffa60ddfb416e8541f0de6585 to your computer and use it in GitHub Desktop.
import json
import math
import os
import requests
DEFAULT_REQUEST_HEADERS = {
'Accept': 'application/json',
'User-Agent': 'ouija',
}
def fetch_json(url):
response = requests.get(url, headers=DEFAULT_REQUEST_HEADERS, timeout=30)
response.raise_for_status()
return response.json()
def getPerfData(branch, revision):
url = "https://treeherder.mozilla.org/api/performance/summary" \
"/?repository=%s&framework=13&interval=1209600&no_subtests=true&revision=%s" % (branch, revision)
filename = "%s_%s.json" % (branch, revision)
if os.path.exists(filename):
with open(filename, 'r') as f:
return json.load(f)
else:
data = fetch_json(url)
with open(filename, 'w') as f:
json.dump(data, f)
return data
def filterPerfData(data):
retVal = []
for item in data:
if not item['values'] or len(item['values']) < 5:
continue
# fission is all we care about
if len(item['name'].split('fission')) < 2:
continue
retVal.append(item)
return retVal
def avg(values):
retVal = 0
sum = 0
for v in values:
sum += v
retVal = sum / len(values)*1.0
return retVal
def stddev(values):
retVal = 0
sqsum = 0
m = avg(values)
for v in values:
sqsum += (v-m) * (v-m)
retVal = sqsum / (len(values)-1)
retVal = math.sqrt(retVal)
return retVal
def getttest(base, new):
# this is simplified due to len(base|new) > 1
avgbase = avg(base)
avgnew = avg(new)
stdbase = stddev(base)
stdnew = stddev(new)
delta = avgnew - avgbase
stddifferr = math.sqrt(((stdbase*stdbase) / len(base)) + ((stdnew*stdnew) / len(new)))
# if all [values] are the same, stddev == 0, therefore, we need to return 0;
if (stddifferr == 0):
return 10
return delta / stddifferr
def summarizePerfData(data):
retVal = {}
for item in data:
if item['name'] not in retVal.keys():
std = stddev(item['values'])
stddevpct = (std / avg(item['values'])) * 100
retVal[item['name']] = {'values': item['values'],
'avg': avg(item['values']),
'stddev': stddevpct,
'lib': item['lower_is_better']}
return retVal
def getSummary(branch, rev):
data = getPerfData(branch, rev)
data = filterPerfData(data)
return summarizePerfData(data)
def getRegressionData(branch, before, after):
regressions = {}
bdata = getSummary(branch, before)
adata = getSummary(branch, after)
# find diff of values and stddevpct
for metric in bdata:
if metric not in adata:
continue
# positive is a regression
pctval = ((adata[metric]['avg'] - bdata[metric]['avg']) / bdata[metric]['avg']) * 100
# this is the difference in noise percentage- ideally lower noise
noise = bdata[metric]['stddev'] - adata[metric]['stddev']
# used for confidence / ismeaningful- only looking for high confidence
ttest = abs(getttest(bdata[metric]['values'], adata[metric]['values']))
# meaningful helps us filter data quickly
meaningful = False
newIsBetter = bdata[metric]['lib'] and ((adata[metric]['avg'] - bdata[metric]['avg']) < 0)
ratio = bdata[metric]['avg'] / adata[metric]['avg']
if ratio < 1:
ratio = 1 / ratio
if ratio < 1.02 or ttest < 3:
meaningful = False
elif ttest < 5:
if not newIsBetter:
meaningful = True
else:
meaningful = True
regressions[metric] = {'regression': pctval,
'noise': noise,
'meaningful': meaningful}
return regressions
branch = 'try'
# moonshots
before = '3a3b2003a298fce78f75ca936783e65b59e126d6'
after = '102f6a4387632957e21ee1e7c47a7c87db25b236'
basedata = getRegressionData(branch, before, after)
azure_before = '24514a27ef4638a7a87605da8275bb52c5985b85'
azure_after = 'c2e59e62232ac6c0a99066ceed30f0314c479d81'
newdata = getRegressionData(branch, azure_before, azure_after)
metrics = basedata.keys()
metrics.sort()
for metric in metrics:
if metric not in newdata.keys() or \
not newdata[metric]['meaningful'] or \
not basedata[metric]['meaningful']:
print(metric)
continue
newstddev = newdata[metric]['noise']
basestddev = basedata[metric]['noise']
# the closer to zero the better; if moonshots detect a 4% regression, we should be detecting something similar
#
delta = newdata[metric]['regression'] - basedata[metric]['regression']
print("%s,%s,%s" % (metric, delta, (newstddev>basestddev)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment