Skip to content

Instantly share code, notes, and snippets.

@cgoldberg
Created April 9, 2015 18:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cgoldberg/8fa5fd1ed4e620efd633 to your computer and use it in GitHub Desktop.
Save cgoldberg/8fa5fd1ed4e620efd633 to your computer and use it in GitHub Desktop.
analyze top slowest pages using onload event beacon data
#!/usr/bin/env python
import collections
import re
from operator import itemgetter
import numpy
DATA_FILE = 'perflog-everything-onload.csv'
NUM_RESULTS = 50
PAGE_REGEX = re.compile(r'""page"": ""(.+?)""')
ONLOAD_REGEX = re.compile(r'""value"": ""(.+?)""')
def count_pages(filename):
with open(filename) as f:
events = f.readlines()
unique_pages = set()
for row in events:
match = PAGE_REGEX.search(row)
page = match.group(1)
unique_pages.add(page)
return len(events), len(unique_pages)
if __name__ == '__main__':
page_times = collections.defaultdict(list)
with open(DATA_FILE) as f:
rows = f.readlines()
for row in rows:
match = PAGE_REGEX.search(row)
page_url = match.group(1)
match = ONLOAD_REGEX.search(row)
onload_timer = int(match.group(1)) / 1000.0
page_times[page_url].append(onload_timer)
results = []
for url, onload_times in page_times.items():
num_requests = len(onload_times)
percentile_95_time = numpy.percentile(onload_times, 95)
results.append((num_requests, percentile_95_time, url))
total_requests, unique_pages = count_pages(DATA_FILE)
print 'analyzing {} requests.'.format(total_requests)
print 'found {} unique pages.'.format(unique_pages)
print '\n'
print 'views\ttime (95th)\turl'
print '-' * 50
results_by_num_requests = sorted(results, key=itemgetter(0), reverse=True)
for result in results_by_num_requests[:NUM_RESULTS]:
num_requests, percentile_95_time, url = result
timer = float('{0:.2f}'.format(percentile_95_time))
print '{}\t{}\t\t{}'.format(num_requests, timer, url)
print '\n'
print 'views\ttime (95th)\turl'
print '-' * 50
results_by_onload_time = sorted(results, key=itemgetter(1), reverse=True)
for result in results_by_onload_time[:NUM_RESULTS]:
num_requests, percentile_95_time, url = result
timer = float('{0:.2f}'.format(percentile_95_time))
print '{}\t{}\t\t{}'.format(num_requests, timer, url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment