Last active
August 29, 2016 21:12
-
-
Save dcode/b7acc5bec6b8508261d6 to your computer and use it in GitHub Desktop.
bro2json.py: This is a Python 3 version that assumes utf-8 encoding.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv, gzip, json, itertools | |
class BroDictReader: | |
def __init__(self, filename, comment="#"): | |
self.comment = comment | |
self.gzfile = gzip.open(filename, mode='rt', encoding='utf-8') | |
self.fields = None | |
self.path = None | |
self.seperator = None | |
# Get field names | |
for line in self.gzfile: | |
if line.startswith("#separator"): | |
self.seperator = chr(int(line.split()[1].replace( | |
"\\", "0"), 16)) | |
if line.startswith("#path"): | |
self.path = line.split(self.seperator)[1].strip() | |
if line.startswith("#fields"): | |
self.fields = tuple([x.strip() for x in line.split( | |
self.seperator)[1:]]) | |
break | |
if not self.fields: | |
self.close() | |
return | |
self.gzfile.seek(0) | |
# Note, performance prefers Python 3 | |
filtered = filter(lambda line: '#' not in line, | |
self.gzfile) | |
self.reader = csv.DictReader( filtered, | |
fieldnames=self.fields, | |
delimiter=self.seperator ) | |
def next(self): | |
return self.reader.next() | |
def close(self): | |
self.gzfile.close() | |
def __iter__(self): | |
return self.reader.__iter__() | |
def __enter__(self): | |
return self | |
def __exit__(self, type, value, tb): | |
return self.close() | |
def hook_func(row): | |
""" Just correct some field names and clean data """ | |
try: | |
row["id_orig_h"] = row.pop('id.orig_h') | |
row["id_orig_p"] = row.pop('id.orig_p') | |
row["id_resp_h"] = row.pop('id.resp_h') | |
row["id_resp_p"] = row.pop('id.resp_p') | |
row["timestamp"] = row.pop('ts') | |
except KeyError, e: | |
pass | |
return row | |
def main(infile, outfile): | |
with BroDictReader(infile, '#') as reader, \ | |
gzip.open(outfile, 'wt') as jsonfile: | |
if reader.gzfile.closed: | |
print("Input file does not contain a Bro header.") | |
return | |
jsonfile.write('[') | |
for row in reader: | |
row["_type"] = reader.path | |
row = hook_func(row) | |
json.dump(row, jsonfile) | |
jsonfile.write(',\n') | |
jsonfile.write(']') | |
if __name__ == '__main__': | |
import sys | |
main(infile=sys.argv[1], outfile=sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment