Skip to content

Instantly share code, notes, and snippets.

@mdaniel
Last active December 20, 2015 19:19
Show Gist options
  • Save mdaniel/6182528 to your computer and use it in GitHub Desktop.
Save mdaniel/6182528 to your computer and use it in GitHub Desktop.
Convert the .har files saved by Chrome into a OWASP WebScarab session This is obviously an imperfect conversion since Chrome chooses to omit the response body more often than not.
import calendar
import json
import os
import re
import sys
import time
import urllib
class Conversation:
"""
conversationlog:
### Conversation : 1
RESPONSE_SIZE: 151
WHEN: 1375942513708
COOKIE: JIMBO=/
METHOD: GET
STATUS: 200 OK
URL: http://atesis.local:80/
ORIGIN: Proxy
cookies:
### Cookie : atesis.local/ JIMBO
1375942925153 JIMBO=/; Domain=atesis.local; Path=/
urlinfo
### URL : http://atesis.local:80/
METHODS: GET
SIGNATURE: GET http://atesis.local:80/ (null)
STATUS: 200 OK
CHECKSUM: 5b6d74f1453e20c09d6a20d909779ad7
### URL : http://atesis.local:80/fred/
REFERER: http://atesis.local:80/
fragments/
conversations/
%d-request / %d-response
"""
def __init__(self, num, entry):
self.num = 1 + num
self.entry = entry
self.req = entry['request']
self.resp = entry['response']
## this is causing some kind of Scarab collision on load so just omit it
self.want_urlinfo = False
def write(self):
if 1 == self.num:
if not os.path.exists('conversations'):
os.mkdir('conversations')
have_content = 'text' in self.resp['content']
if self.want_urlinfo:
urlinfo = open('urlinfo', 'a')
print >> urlinfo, '### URL : %s\n' % self.req['url'],
print >> urlinfo, 'METHODS: %s\n' % self.req['method'],
print >> urlinfo, 'STATUS: %d %s\n' % ( \
self.resp['status'], self.resp['statusText'] ),
print >> urlinfo, 'SIGNATURE: %s %s (null)\n' % (
self.req['method'], self.req['url']),
print >> urlinfo, '\n',
urlinfo.close()
c_log = open('conversationlog', 'a')
print >> c_log, '### Conversation : %d\n' % self.num,
resp_size = self.resp['content']['size']
if not have_content:
resp_size = 0
print >> c_log, 'RESPONSE_SIZE: %d\n' % resp_size,
del resp_size
when = calendar.timegm( \
time.strptime( \
re.sub(r'Z$', 'UTC', self.entry['startedDateTime']), \
'%Y-%m-%dT%H:%M:%S.%f%Z') )
# it wants java millis, which is annoying because
# strptime was told about the fractional seconds
when = when * 1000
print >> c_log, 'WHEN: %d\n' % when,
print >> c_log, 'METHOD: %s\n' % self.req['method'],
print >> c_log, 'STATUS: %d %s\n' % ( \
self.resp['status'], self.resp['statusText'] ),
print >> c_log, 'URL: %s\n' % self.req['url'],
print >> c_log, '\n',
c_log.close()
with open('conversations/%d-request' % self.num, 'wb') as fh:
# (proto, rest) = urllib.splittype( self.req['url'] )
# (hostname, uri) = urllib.splithost( rest )
# del rest
## Scarab always thinks it is a proxy request
print >> fh, '%s %s %s\r\n' % (self.req['method'], \
self.req['url'], self.req['httpVersion']),
for h in self.req['headers']:
print >> fh, '%s: %s\r\n' % (h['name'], h['value']),
print >> fh, '\r\n',
if 'postData' in self.req:
txt = self.req['postData']['text']
assert len(txt) == self.req['bodySize'],\
'postData.text[%d] != bodySize[%d] for %s' % (
len(txt), self.req['bodySize'], str(self.entry))
print >> fh, '%s' % txt
with open('conversations/%d-response' % self.num, 'wb') as fh:
print >> fh, '%s %d %s\r\n' % (self.resp['httpVersion'], \
self.resp['status'], self.resp['statusText']),
for h in self.resp['headers']:
h_name = h['name']
h_val = h['value']
if 'content-length' == h_name.lower() and not have_content:
# pretend the server didn't provide content, since we don't have it
h_val = '0'
## eat the C-E if we don't have any content
## actually, just eat the C-E and T-E all the time, because har
## doesn't encode that way but Scarab will try to interpret them
if 'content-encoding' == h_name.lower() or\
'transfer-encoding' == h_name.lower():
continue
print >> fh, '%s: %s\r\n' % (h_name, h_val),
del h_name, h_val
print >> fh, '\r\n',
if have_content:
txt = self.resp['content']['text']
mime_type = self.resp['content']['mimeType']
if mime_type.lower().startswith('image/'):
txt = txt.decode('base64')
else:
txt = txt.encode('utf-8')
print >> fh, txt,
def main( argv ):
"""
log
version : string
creator
name : string
version : string
pages : list
entries : list
request
method : string
url : string
httpVersion : string
headers : list
name : string
value : string
queryString : list
cookies : list
bodySize : number
response
status : number
statusText : string
httpVersion : string
headers : list
cookies : list
content
size : number
compression : number
redirectURL : string
headerSize : number
bodySize : number
"""
har_filename = sys.argv[1]
with open( har_filename, 'rb' ) as fh:
har = json.load( fh )
# "page" is a destination that the user saw
# "entry" is something Chrome loaded
# pg_list = har['log']['pages']
# for pg in pg_list: print 'PAGE', pg['title']
# for e in entries: print 'URL', e['request']['url']
entries = har['log']['entries']
for x in xrange(0, len(entries)):
conv = Conversation(x, entries[x])
conv.write()
del conv
if __name__ == '__main__':
main( sys.argv )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment