Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
parsehar.py - Reads a har file from the filesystem, converts to CSV, then dumps to stdout.
"""Reads a har file from the filesystem, converts to CSV, then dumps to
stdout.
"""
import argparse
import json
from urlparse import urlparse
def main(harfile_path):
"""Reads a har file from the filesystem, converts to CSV, then dumps to
stdout.
"""
harfile = open(harfile_path)
harfile_json = json.loads(harfile.read())
i = 0
for entry in harfile_json['log']['entries']:
i = i + 1
url = entry['request']['url']
urlparts = urlparse(entry['request']['url'])
size_bytes = entry['response']['bodySize']
size_kilobytes = float(entry['response']['bodySize'])/1024
mimetype = 'unknown'
if 'mimeType' in entry['response']['content']:
mimetype = entry['response']['content']['mimeType']
print '%s,"%s",%s,%s,%s,%s' % (i, url, urlparts.hostname, size_bytes,
size_kilobytes, mimetype)
if __name__ == '__main__':
argparser = argparse.ArgumentParser(
prog='parsehar',
description='Parse .har files into comma separated values (csv).')
argparser.add_argument('harfile', type=str, nargs=1,
help='path to harfile to be processed.')
args = argparser.parse_args()
main(args.harfile[0])
@holterman

This comment has been minimized.

Copy link

@holterman holterman commented Jun 17, 2015

Very usefull code.
Thanks!

@kantandane

This comment has been minimized.

Copy link

@kantandane kantandane commented Sep 14, 2017

Hi, thank you for this code !
What about actually save the csv instead of printing it ?

"""Reads a har file from the filesystem, converts to CSV, then dumps to
stdout.
"""
import argparse
import json
import csv
from urlparse import urlparse


def main(harfile_path):
    """Reads a har file from the filesystem, converts to CSV, then dumps to
    stdout.
    """
    harfile = open(harfile_path)
    harfile_json = json.loads(harfile.read())
    i = 0
    with open(harfile_path[:-3] + '.csv', 'w') as f:
        csv_file = csv.writer(f)
        csv_file.writerow(['id', 'url', 'hostname', 'size (bytes)',
            'size (kilobytes)', 'mimetype'])

        for entry in harfile_json['log']['entries']:
            i = i + 1
            url = entry['request']['url']
            urlparts = urlparse(entry['request']['url'])
            size_bytes = entry['response']['bodySize']
            size_kilobytes = float(entry['response']['bodySize'])/1024
            mimetype = 'unknown'
            if 'mimeType' in entry['response']['content']:
                mimetype = entry['response']['content']['mimeType']

            csv_file.writerow([i, url, urlparts.hostname, size_bytes,
                size_kilobytes, mimetype])

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(
        prog='parsehar',
        description='Parse .har files into comma separated values (csv).')
    argparser.add_argument('harfile', type=str, nargs=1,
                        help='path to harfile to be processed.')
    args = argparser.parse_args()

    main(args.harfile[0])
@octaviogl

This comment has been minimized.

Copy link

@octaviogl octaviogl commented Nov 9, 2018

Very useful, indeed. Thank you very much!
I have also included the time in the csv:
csv_file.writerow(['id', 'url', 'time','hostname', 'size (bytes)', 'size (kilobytes)', 'mimetype'])
...
time = entry['time']
...
csv_file.writerow([i, url, time, urlparts.hostname, size_bytes, size_kilobytes, mimetype])

Best.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment