Last active
February 16, 2019 14:14
Star
You must be signed in to star a gist
Analyse SQL dumps of raw EventLogging NavigationTiming data for median and p95 percentile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import csv | |
import json | |
import sys | |
import datetime | |
def get_data( filename, from_ts, to_ts, metric, wiki=None, http2=None, anon=None, lazy = None ): | |
sample = [] | |
f = open( filename ) | |
delim = '\t' | |
headings = f.readline().split( delim ) | |
i = 0 | |
tsColNum = None | |
netspeedColNum = None | |
metricColNum = None | |
wikiCol = None | |
actionCol = None | |
mobileModeColumn = None | |
rangeFromTs = None | |
rangeUntilTs = None | |
isAnonCol = None | |
nsCol = None | |
http2Col = None | |
for heading in headings: | |
if heading == '"timestamp"': | |
tsColNum = i | |
elif heading == '"event_lazyLoadImages"': | |
netspeedColNum = i | |
elif heading == metric: | |
metricColNum = i | |
elif heading == '"event_isAnon"': | |
isAnonCol = i | |
elif heading == '"event_namespaceId"': | |
nsCol = i | |
elif heading == '"wiki"': | |
wikiCol = i | |
elif heading == '"event_mobileMode"': | |
mobileModeColumn = i | |
elif heading == '"event_action"': | |
actionCol = i | |
elif heading == '"event_isHttp2"': | |
http2Col = i | |
i+=1 | |
f.close() | |
with open(filename, 'rb') as csvfile: | |
reader = csv.reader(csvfile, delimiter=delim, quoting=csv.QUOTE_MINIMAL) | |
for cols in reader: | |
try: | |
ts = cols[tsColNum] | |
if ts == 'timestamp': | |
continue | |
if rangeFromTs is None or ts < rangeFromTs: | |
rangeFromTs = ts | |
if rangeUntilTs is None or ts > rangeUntilTs: | |
rangeUntilTs = ts | |
val = cols[metricColNum] | |
forwiki = cols[wikiCol] | |
action = cols[actionCol] | |
ns = cols[nsCol] | |
isAnon = cols[isAnonCol] | |
allowed = True | |
if http2 and cols[http2Col] != http2: | |
allowed = False | |
if lazy and lazy != cols[netspeedColNum]: | |
allowed = False | |
if action != 'view': | |
allowed = False | |
if val == '0' or val == '1' or val == 'NULL': | |
allowed = False | |
if ns != '0': | |
allowed = False | |
if anon and isAnon != anon: | |
allowed = False | |
if ts > from_ts and ts < to_ts: | |
if allowed: | |
sample.append( int( val ) ) | |
except IndexError: | |
print '!' | |
pass | |
f.close() | |
#print '%s to %s'%(rangeFromTs,rangeUntilTs) | |
return sample | |
def data( filenames, from_ts, to_ts, colNum, wiki=None, http2=None, lazy = None, label='', anon=None ): | |
sample = [] | |
for filename in filenames: | |
sample.extend( get_data( filename, from_ts, to_ts, colNum, wiki=wiki, http2=http2, anon=anon, lazy=lazy ) ) | |
if len( sample ) > 0: | |
a = np.array(sample) | |
p = np.percentile(a, 95) | |
median = np.percentile(a, 50) | |
print '|-\n| %s || %s || %s || %s'%(label, len(sample),p,median) | |
def table_open(): | |
return '{| class="wikitable"\n|-\n! Label !! Sample Size !! 95th percentile !! median' | |
def table_close(): | |
return '|}' | |
# get arguments | |
try: | |
filename = sys.argv[1] | |
changedAt = sys.argv[2] | |
days = sys.argv[3] | |
wiki = None | |
files = [ filename ] | |
dt = datetime.datetime.strptime(changedAt,'%Y%m%d%H%M%S') | |
delta = datetime.timedelta(days=int(days)) | |
fromTs = (dt - delta).strftime('%Y%m%d%H%M%S') | |
endTs = (dt + delta).strftime('%Y%m%d%H%M%S') | |
except IndexError: | |
print 'Usage: `python navtimingcsv.py filename.tsv 20160713001400 8`' | |
sys.exit() | |
print "\nFully loaded" | |
event = '"event_loadEventEnd"' | |
print table_open() | |
data( files, fromTs, changedAt, event, wiki, label='Before change' ) | |
data( files, changedAt, endTs, event, wiki, label='After change') | |
data( files, fromTs, changedAt, event, wiki, anon='1', label='Before change (anons)' ) | |
data( files, changedAt, endTs, event, wiki, anon='1', label='After change (anons)') | |
data( files, fromTs, changedAt, event, wiki, http2='1', label='Before change (http2)' ) | |
data( files, changedAt, endTs, event, wiki, http2='1', label='After change (http2)') | |
data( files, fromTs, changedAt, event, wiki, http2='0', label='Before change (http1)' ) | |
data( files, changedAt, endTs, event, wiki, http2='0', label='After change (http1)') | |
print table_close() | |
print "\nFirst paint" | |
print table_open() | |
data( files, fromTs, changedAt, '"event_firstPaint"', label='Before change' ) | |
data( files, changedAt, endTs, '"event_firstPaint"', label='After change' ) | |
print table_close() | |
print "\nDomInteractive" | |
print table_open() | |
data( files, fromTs, changedAt, '"event_domInteractive"', label='Before change' ) | |
data( files, changedAt, endTs, '"event_domInteractive"', label='After change' ) | |
print table_close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment