Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jeffcarp
Last active July 13, 2017 00:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffcarp/f1fb015e38f50e82d30b8c69b67faa74 to your computer and use it in GitHub Desktop.
Save jeffcarp/f1fb015e38f50e82d30b8c69b67faa74 to your computer and use it in GitHub Desktop.
Analyze WPT Export latency
#!/usr/bin/python3
# Warning: this file is kind of a mess at the moment
import json
import csv
import requests
import re
import time
import subprocess
from dateutil import parser
import numpy
# from text_histogram import histogram
PR_FILE = 'q2prs.json'
MINS_FILE = 'mins.json'
CHROMIUM_DIR = '/usr/local/google/home/jeffcarp/chromium/src'
def fetch_all_q2_prs():
print('Fetching all PRs')
base_url = 'https://api.github.com/search/issues?q=repo:w3c/web-platform-tests%20type:pr%20label:chromium-export%20is:merged'
r = requests.get(base_url)
data = r.json()
total = data['total_count']
print(total, 'total PRs')
page_size = 50
total_pages = int(total / page_size) + 2
prs = []
for page in range(1, total_pages):
print('Fetching page', page)
r = requests.get('{}&page={}&per_page={}'.format(base_url, page, page_size))
data = r.json()
if 'items' not in data:
print('no items in data:', data)
break
prs.extend(data['items'])
print('Fetched', len(prs), 'PRs')
return prs
def fetch_and_write_q2_prs():
prs = fetch_all_q2_prs()
with open(PR_FILE, 'w') as f:
json.dump(prs, f)
def get_local_q2_prs():
with open(PR_FILE) as f:
return json.load(f)
def issue_url_from_body(body):
m = re.search('Review-Url: (.+)\n', body, re.MULTILINE)
print(m.groups())
def get_sha_from_grep(url):
cmd = ['git', 'log', '--all', '--format=%H', '-1', '--grep=%s' % url]
p = subprocess.Popen(cmd, cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
print("the commandline is {}".format(p.args))
print("the commandline is {}".format(' '.join(p.args)))
p.wait()
sha = p.stdout.readline().strip()
try:
assert len(sha) == 40, 'SHA not correct: %s' % sha
return sha
except AssertionError:
print('NO GOOD SHA')
def get_and_print_prs():
prs = get_local_q2_prs()
print('Number of PRs in Q2:', len(prs))
min_differences = []
min_differences_by_month = {}
skipped = []
for index, pr in enumerate(prs):
print('PR number', index)
print('PR URL:', 'https://github.com/w3c/web-platform-tests/pull/%s' % pr['number'])
pr_closed_at = parser.parse(pr['closed_at'])
# Double parse to remove timezone since Rietveld API (below) doesn't supply it
pr_closed_at = parser.parse(pr_closed_at.strftime('%Y-%m-%d %H:%M:%S'))
rietveld_issue = None
gerrit_issue = None
m = re.search('https:\/\/codereview\.chromium\.org\/(.+)\n', pr['body'], re.MULTILINE)
try:
rietveld_issue = m.groups()[0]
except AttributeError as e:
print('PROBABLY A GERRIT PR, SEARCHING')
m = re.search('Reviewed-on\: https\:\/\/chromium-review\.googlesource\.com\/(.+)\n', pr['body'], re.MULTILINE)
try:
gerrit_issue = m.groups()[0].strip()
except AttributeError as e:
print('Could not get issue number from Gerrit CL!')
raise
if rietveld_issue:
print('Found Rietveld issue', rietveld_issue)
sha = get_sha_from_grep('https://codereview.chromium.org/%s' % rietveld_issue)
'''
r = requests.get('https://codereview.chromium.org/api/{}'.format(rietveld_issue))
cl_data = r.json()
# cl_modified_at = parser.parse(cl_data['modified'])
'''
elif gerrit_issue:
print('Found Gerrit issue', gerrit_issue)
sha = get_sha_from_grep('https://chromium-review.googlesource.com/%s' % gerrit_issue)
else:
raise ('No issue supplied!', rietveld_issue, gerrit_issue)
if not sha or len(sha) != 40:
print('SKIPPING!')
skipped.append(sha)
continue
'''
m = re.search('Committed: https:\/\/chromium\.googlesource\.com\/chromium\/src\/\+\/(\S+)', cl_data['description'], re.MULTILINE)
try:
sha = m.groups()[0]
except AttributeError as e:
print(cl_data)
raise 'NO COMMITED FOOTER ON ABOVE CL'
'''
print('found SHA', sha)
p = subprocess.Popen(['git', 'show', '-s', '--format=%ci', sha], cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
p.wait()
try:
commit_time = parser.parse(p.stdout.readline())
commit_time = parser.parse(commit_time.strftime('%Y-%m-%d %H:%M:%S'))
except Exception as e:
print(e)
print('Mistakes were made')
continue
print('pr_closed_at', pr_closed_at)
print('commit_time', commit_time)
mins_difference = (pr_closed_at - commit_time).total_seconds() / 60
print('mins diff:', mins_difference)
if mins_difference < 0:
print('NEGATIVE, SKIPPING')
skipped.append(sha)
continue
min_differences.append(mins_difference)
print('MONTH')
datekey = commit_time.strftime('%Y-%m')
if datekey not in min_differences_by_month:
min_differences_by_month[datekey] = []
min_differences_by_month[datekey].append(mins_difference)
print('Done\n\n')
# print(pr['number'], pr['created_at'], pr['closed_at'], mins_difference)
'''
print('writing file', MINS_FILE)
with open(MINS_FILE, 'w') as f:
json.dump(min_differences, f)
'''
items = min_differences_by_month.items()
items = sorted(items, reverse=True, key=lambda i: i[0])
# print(items)
with open('export-latencies.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['Month', '50th percentile', '90th percentile'])
writer.writeheader()
for key, diffs in items:
writer.writerow({
'Month': key,
'50th percentile': numpy.percentile(diffs, 50),
'90th percentile': numpy.percentile(diffs, 90)
})
'''
for key, diffs in items:
print(key)
print('50th percentile', numpy.percentile(diffs, 50))
print('90th percentile', numpy.percentile(diffs, 90))
print()
'''
def analzye_mins():
with open(MINS_FILE) as f:
min_differences = json.load(f)
out_of_sla = []
in_sla = []
for mins in min_differences:
if mins > 24 * 60:
out_of_sla.append(mins)
if mins < 35:
in_sla.append(mins)
average = sum(min_differences) / len(min_differences)
total = len(min_differences)
print('Average PR creation to merge latency:', average, 'minutes', '(', average / 60, 'hours)')
print(len(out_of_sla), '/', total, 'PRs out of 24H SLA -', len(out_of_sla) / total)
print(len(in_sla), '/', total, 'PRs inside 35m SLA -', len(in_sla) / total)
# print('Skipped', len(skipped), 'PRs due to negative timedelta')
histogram(min_differences)
print('50th percentile', numpy.percentile(min_differences, 50))
print('90th percentile', numpy.percentile(min_differences, 90))
def main():
# fetch_and_write_q2_prs()
get_and_print_prs()
# analzye_mins()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment