Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script to extract statistics about MySQL queries in a .pcap.
#!/usr/bin/env python3
'''
Script to extract statistics about MySQL queries in a .pcap.
'''
import sys
from subprocess import run
import json, csv, re
args = sys.argv[1:]
if len(args) != 2:
print('Usage: script.py <mysql_packets.pcap> [time|size]', file=sys.stderr)
exit(2)
fname, sort = args
keys = {
'time': lambda q: q['response_end'],
'size': lambda q: q['transferred_size'],
}
if sort not in keys:
print('Invalid sort key.', file=sys.stderr)
exit(2)
fields = [ 'frame.time_relative', 'tcp.srcport', 'mysql.packet_length', 'mysql.query' ]
data = run([ 'tshark', '-r', fname, '-Tjson', *('-e' + f for f in fields) ], \
check=True, text=True, capture_output=True)
packets = json.loads(data.stdout)
def get_unique(x):
assert len(x) == 1
return x[0]
def parse_packet(x):
x = x['_source']['layers']
r = {}
r['time'] = float(get_unique(x['frame.time_relative']))
r['is_server'] = int(get_unique(x['tcp.srcport'])) == 3306
if 'mysql.packet_length' in x: r['packet_lengths'] = list(map(int, x['mysql.packet_length']))
if 'mysql.query' in x: r['query'] = get_unique(x['mysql.query'])
return r
packets = [parse_packet(x) for x in packets]
packets = [x for x in packets if 'packet_lengths' in x]
queries = []
while packets:
query = packets.pop(0)
assert not query['is_server']
response_frames = []
while packets and packets[0]['is_server']:
response_frames.append(packets.pop(0))
assert len(response_frames) > 0
size = sum(x for packet in response_frames for x in packet['packet_lengths'])
queries.append({
'query': query['query'],
'transferred_size': size,
'start': query['time'],
'response_start': response_frames[0]['time'] - query['time'],
'response_end': response_frames[-1]['time'] - query['time'],
})
for q in sorted(queries, key=keys[sort]):
print('{:7d} {:4.1f}ms {}'.format( q['transferred_size'], q['response_end']*1e3,
re.sub(r'\s+', ' ', q['query'][:100].strip()) + ('...' if len(q['query']) > 100 else '') ))
print('\n{} queries, {:.1f}ms response time, {:.0f}kb transferred'.format( \
len(queries), sum(q['response_end'] for q in queries) * 1e3, sum(q['transferred_size'] for q in queries) / 1024 ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment