Created
April 27, 2016 17:34
-
-
Save danielrmeyer/66843ff78dc530cc52d22a412601ec8a to your computer and use it in GitHub Desktop.
convert a recorded http interaction from tcpwatch to a json object.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import glob | |
import codecs | |
import re | |
import json | |
path2recording = os.path.join(os.getcwd(), 'recording') | |
requests = glob.glob(os.path.join(path2recording, "*.request")) | |
blacklist = [""".*\.js""", """.*\.css""", """.*\.gif""", | |
""".*\.jpeg""", """.*\.jpg""", """.*\.ico""", | |
""".*\.woff""", """.*\.(t|o)tf""", """.*\.png"""] | |
whitelist = ["""http://mydomain.com/.*"""] | |
def process_request(path2req): | |
''' | |
Takes path to a watch[0-9]+\.request file from a tcpwatch recording. | |
Returns: processed request | |
''' | |
req_id = path2req.split('/')[-1].split('.')[0].replace('watch', '') | |
processed_req = {'id': req_id} | |
with codecs.open(path2req, "r", encoding='utf-8') as f: | |
raw_req = f.read() | |
processed_req['method'] = raw_req.split('\r\n')[0].split(' ')[0] | |
processed_req['url'] = raw_req.split('\r\n')[0].split(' ')[1] | |
if processed_req['method'] == 'POST': | |
for i, val in enumerate(raw_req.split('\r\n')): | |
if val == '': | |
form_sep = i | |
raw_headers = raw_req.split('\r\n')[1:form_sep] | |
raw_form = raw_req.split('\r\n')[form_sep+1:][0] | |
headers = {} | |
for line in raw_headers: | |
k,v = line.split(': ') | |
headers[k] = v | |
processed_req['headers'] = headers | |
form_data = {} | |
for part in raw_form.split('&'): | |
k,v = part.split('=') | |
form_data[k] = v | |
processed_req['form_data'] = form_data | |
elif processed_req['method'] == 'GET': | |
raw_headers = raw_req.split('\r\n')[1:-2] | |
headers = {} | |
for line in raw_headers: | |
k,v = line.split(': ') | |
headers[k] = v | |
processed_req['headers'] = headers | |
elif processed_req['method'] == 'CONNECT': | |
raw_headers = raw_req.split('\r\n')[1:-2] | |
headers = {} | |
for line in raw_headers: | |
k,v = line.split(': ') | |
headers[k] = v | |
processed_req['headers'] = headers | |
else: | |
raise Exception("Method %s with request id %s not implemented." % (processed_req['method'], processed_req['id'])) | |
return processed_req | |
processed_requests = [process_request(x) for x in requests] | |
def in_patt_list(url, pl): | |
''' | |
Check if the url is in the pattern list pl. | |
pl could be a whitelist or blacklist. | |
Returns: bool | |
''' | |
for patt in pl: | |
m = re.search(patt, url) | |
if m is not None: | |
return True | |
whitelisted_requests = [x for x in processed_requests if in_patt_list(x['url'], whitelist)] | |
final_requests = [x for x in whitelisted_requests if not in_patt_list(x['url'], blacklist)] | |
with codecs.open('recording.json', "w", encoding='utf-8') as f: | |
json.dump(final_requests, f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment