Skip to content

Instantly share code, notes, and snippets.

@danielrmeyer
Created April 27, 2016 17:34
Show Gist options
  • Save danielrmeyer/66843ff78dc530cc52d22a412601ec8a to your computer and use it in GitHub Desktop.
Save danielrmeyer/66843ff78dc530cc52d22a412601ec8a to your computer and use it in GitHub Desktop.
convert a recorded http interaction from tcpwatch to a json object.
# -*- coding: utf-8 -*-
import os
import glob
import codecs
import re
import json
path2recording = os.path.join(os.getcwd(), 'recording')
requests = glob.glob(os.path.join(path2recording, "*.request"))
blacklist = [""".*\.js""", """.*\.css""", """.*\.gif""",
""".*\.jpeg""", """.*\.jpg""", """.*\.ico""",
""".*\.woff""", """.*\.(t|o)tf""", """.*\.png"""]
whitelist = ["""http://mydomain.com/.*"""]
def process_request(path2req):
'''
Takes path to a watch[0-9]+\.request file from a tcpwatch recording.
Returns: processed request
'''
req_id = path2req.split('/')[-1].split('.')[0].replace('watch', '')
processed_req = {'id': req_id}
with codecs.open(path2req, "r", encoding='utf-8') as f:
raw_req = f.read()
processed_req['method'] = raw_req.split('\r\n')[0].split(' ')[0]
processed_req['url'] = raw_req.split('\r\n')[0].split(' ')[1]
if processed_req['method'] == 'POST':
for i, val in enumerate(raw_req.split('\r\n')):
if val == '':
form_sep = i
raw_headers = raw_req.split('\r\n')[1:form_sep]
raw_form = raw_req.split('\r\n')[form_sep+1:][0]
headers = {}
for line in raw_headers:
k,v = line.split(': ')
headers[k] = v
processed_req['headers'] = headers
form_data = {}
for part in raw_form.split('&'):
k,v = part.split('=')
form_data[k] = v
processed_req['form_data'] = form_data
elif processed_req['method'] == 'GET':
raw_headers = raw_req.split('\r\n')[1:-2]
headers = {}
for line in raw_headers:
k,v = line.split(': ')
headers[k] = v
processed_req['headers'] = headers
elif processed_req['method'] == 'CONNECT':
raw_headers = raw_req.split('\r\n')[1:-2]
headers = {}
for line in raw_headers:
k,v = line.split(': ')
headers[k] = v
processed_req['headers'] = headers
else:
raise Exception("Method %s with request id %s not implemented." % (processed_req['method'], processed_req['id']))
return processed_req
processed_requests = [process_request(x) for x in requests]
def in_patt_list(url, pl):
'''
Check if the url is in the pattern list pl.
pl could be a whitelist or blacklist.
Returns: bool
'''
for patt in pl:
m = re.search(patt, url)
if m is not None:
return True
whitelisted_requests = [x for x in processed_requests if in_patt_list(x['url'], whitelist)]
final_requests = [x for x in whitelisted_requests if not in_patt_list(x['url'], blacklist)]
with codecs.open('recording.json', "w", encoding='utf-8') as f:
json.dump(final_requests, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment