Skip to content

Instantly share code, notes, and snippets.

@jdlrobson
Last active February 16, 2019 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdlrobson/ce4059100085ad051b1f5144f197e582 to your computer and use it in GitHub Desktop.
Save jdlrobson/ce4059100085ad051b1f5144f197e582 to your computer and use it in GitHub Desktop.
Analyse SQL dumps of raw EventLogging NavigationTiming data for median and p95 percentile
import numpy as np
import csv
import json
import sys
import datetime
def get_data( filename, from_ts, to_ts, metric, wiki=None, http2=None, anon=None, lazy = None ):
sample = []
f = open( filename )
delim = '\t'
headings = f.readline().split( delim )
i = 0
tsColNum = None
netspeedColNum = None
metricColNum = None
wikiCol = None
actionCol = None
mobileModeColumn = None
rangeFromTs = None
rangeUntilTs = None
isAnonCol = None
nsCol = None
http2Col = None
for heading in headings:
if heading == '"timestamp"':
tsColNum = i
elif heading == '"event_lazyLoadImages"':
netspeedColNum = i
elif heading == metric:
metricColNum = i
elif heading == '"event_isAnon"':
isAnonCol = i
elif heading == '"event_namespaceId"':
nsCol = i
elif heading == '"wiki"':
wikiCol = i
elif heading == '"event_mobileMode"':
mobileModeColumn = i
elif heading == '"event_action"':
actionCol = i
elif heading == '"event_isHttp2"':
http2Col = i
i+=1
f.close()
with open(filename, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=delim, quoting=csv.QUOTE_MINIMAL)
for cols in reader:
try:
ts = cols[tsColNum]
if ts == 'timestamp':
continue
if rangeFromTs is None or ts < rangeFromTs:
rangeFromTs = ts
if rangeUntilTs is None or ts > rangeUntilTs:
rangeUntilTs = ts
val = cols[metricColNum]
forwiki = cols[wikiCol]
action = cols[actionCol]
ns = cols[nsCol]
isAnon = cols[isAnonCol]
allowed = True
if http2 and cols[http2Col] != http2:
allowed = False
if lazy and lazy != cols[netspeedColNum]:
allowed = False
if action != 'view':
allowed = False
if val == '0' or val == '1' or val == 'NULL':
allowed = False
if ns != '0':
allowed = False
if anon and isAnon != anon:
allowed = False
if ts > from_ts and ts < to_ts:
if allowed:
sample.append( int( val ) )
except IndexError:
print '!'
pass
f.close()
#print '%s to %s'%(rangeFromTs,rangeUntilTs)
return sample
def data( filenames, from_ts, to_ts, colNum, wiki=None, http2=None, lazy = None, label='', anon=None ):
sample = []
for filename in filenames:
sample.extend( get_data( filename, from_ts, to_ts, colNum, wiki=wiki, http2=http2, anon=anon, lazy=lazy ) )
if len( sample ) > 0:
a = np.array(sample)
p = np.percentile(a, 95)
median = np.percentile(a, 50)
print '|-\n| %s || %s || %s || %s'%(label, len(sample),p,median)
def table_open():
return '{| class="wikitable"\n|-\n! Label !! Sample Size !! 95th percentile !! median'
def table_close():
return '|}'
# get arguments
try:
filename = sys.argv[1]
changedAt = sys.argv[2]
days = sys.argv[3]
wiki = None
files = [ filename ]
dt = datetime.datetime.strptime(changedAt,'%Y%m%d%H%M%S')
delta = datetime.timedelta(days=int(days))
fromTs = (dt - delta).strftime('%Y%m%d%H%M%S')
endTs = (dt + delta).strftime('%Y%m%d%H%M%S')
except IndexError:
print 'Usage: `python navtimingcsv.py filename.tsv 20160713001400 8`'
sys.exit()
print "\nFully loaded"
event = '"event_loadEventEnd"'
print table_open()
data( files, fromTs, changedAt, event, wiki, label='Before change' )
data( files, changedAt, endTs, event, wiki, label='After change')
data( files, fromTs, changedAt, event, wiki, anon='1', label='Before change (anons)' )
data( files, changedAt, endTs, event, wiki, anon='1', label='After change (anons)')
data( files, fromTs, changedAt, event, wiki, http2='1', label='Before change (http2)' )
data( files, changedAt, endTs, event, wiki, http2='1', label='After change (http2)')
data( files, fromTs, changedAt, event, wiki, http2='0', label='Before change (http1)' )
data( files, changedAt, endTs, event, wiki, http2='0', label='After change (http1)')
print table_close()
print "\nFirst paint"
print table_open()
data( files, fromTs, changedAt, '"event_firstPaint"', label='Before change' )
data( files, changedAt, endTs, '"event_firstPaint"', label='After change' )
print table_close()
print "\nDomInteractive"
print table_open()
data( files, fromTs, changedAt, '"event_domInteractive"', label='Before change' )
data( files, changedAt, endTs, '"event_domInteractive"', label='After change' )
print table_close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment