Skip to content

Instantly share code, notes, and snippets.

@mw2q
Created January 2, 2020 23:58
post processing dbt2 mix.log with python
#!/usr/bin/env python
import sys
from math import ceil, floor, fsum
def average(values):
count = float(len(values))
if count == 0:
return 0
return fsum(values) / float(len(values))
def percentile_90(values):
index = 0.9 * float(len(values))
findex = int(floor(index))
cindex = int(ceil(index))
values.sort()
try:
if findex == cindex:
return values[findex - 1]
return (values[findex - 1] + values [cindex - 1]) / 2.0
except:
return 0
if len(sys.argv) != 2:
print('usage: %s <mix.log>' % sys.argv[0])
exit(1)
filename = sys.argv[1]
try:
f = open(filename, 'r')
except:
print('cannot open file: %s' % filename)
exit(1)
rampup_start = None
rampup_end = None
steadystate_start = None
steadystate_end = None
d = 'd'
n = 'n'
o = 'o'
p = 'p'
s = 's'
error_count = 0
response_time = dict()
rollback_count = dict()
transaction_count = dict()
for key in [d, n, o, p, s]:
response_time[key] = list()
rollback_count[key] = 0
transaction_count[key] = 0
total_transaction_count = 0
for line in f:
c = line.strip().split(',')
if len(c) != 5:
print('unknown number of columns: %d' % len(c))
print('exiting')
exit(2)
if rampup_start is None:
rampup_start = int(c[0])
if c[1] == 'START':
rampup_end = int(c[0])
steadystate_start = int(c[0])
# Keep resetting the counts in the event there are multiple START
# markers from multiple drivers.
for key in [d, n, o, p, s]:
response_time[key] = list()
rollback_count[key] = 0
transaction_count[key] = 0
continue
if c[1] == 'TERMINATED' and steadystate_end is None:
steadystate_end = int(c[0])
break
if c[2] == 'C':
transaction_count[c[1]] += 1
response_time[c[1]].append(float(c[3]))
total_transaction_count += 1
elif c[2] == 'R':
rollback_count[c[1]] += 1
transaction_count[c[1]] += 1
total_transaction_count += 1
elif c[2] == 'E':
error_count += 1
if rampup_end is None:
print('START marker not found')
exit(3)
if steadystate_end is None:
print('TERMINATED marker not found')
exit(3)
transaction_name = dict()
transaction_name[d] = 'Delivery'
transaction_name[n] = 'New Order'
transaction_name[o] = 'Order Status'
transaction_name[p] = 'Payment'
transaction_name[s] = 'Stock Level'
print(' Response Time (s)')
print(' Transaction % Average : 90th % Total Rollbacks %')
print('------------ ----- --------------------- ----------- --------------- -----')
for key in [d, n, o, p, s]:
try:
mix_percentage = float(transaction_count[key]) / \
float(total_transaction_count) * 100.0
except:
mix_percentage = 0
try:
rollback_percentage = float(rollback_count[key]) / \
float(transaction_count[key]) * 100.0
except:
rollback_percentage = 0
print('%12s %5.2f %9.3f : %9.3f %11d %15d %5.2f' % (
transaction_name[key],
mix_percentage,
average(response_time[key]),
percentile_90(response_time[key]),
transaction_count[key] - rollback_count[key],
rollback_count[key],
rollback_percentage))
print('------------ ----- --------------------- ----------- --------------- -----')
duration = float(steadystate_end - steadystate_start)
print()
print('%0.2f new-order transactions per minute (NOTPM)' % \
(float(transaction_count[n]) / (duration / 60.0)))
print('%0.1f minute duration' % (duration / 60.0))
print('%d total unknown errors' % error_count)
print('%0.1f seconds(s) ramping up' % (float(rampup_end - rampup_start)))
Copy link

ghost commented Jan 10, 2020

Thanks for sharing the code! Gave me something to experiment with.
I benchmarked this on a GNU/Linux system using hyperfine './dbt2-post-process.py ../mix.log' --export-markdown:

Command Mean [s] Min [s] Max [s] Relative
./dbt2-post-process.py ../mix.log 19.045 ± 0.747 18.269 20.349 1.00

I spiked an effort in Rust here: https://github.com/efx/dbt2-post-process too.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment