Skip to content

Instantly share code, notes, and snippets.

@dcode
Last active August 29, 2016 21:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dcode/b7acc5bec6b8508261d6 to your computer and use it in GitHub Desktop.
Save dcode/b7acc5bec6b8508261d6 to your computer and use it in GitHub Desktop.
bro2json.py: This is a Python 3 version that assumes utf-8 encoding.
#!/usr/bin/env python
import csv, gzip, json, itertools
class BroDictReader:
def __init__(self, filename, comment="#"):
self.comment = comment
self.gzfile = gzip.open(filename, mode='rt', encoding='utf-8')
self.fields = None
self.path = None
self.seperator = None
# Get field names
for line in self.gzfile:
if line.startswith("#separator"):
self.seperator = chr(int(line.split()[1].replace(
"\\", "0"), 16))
if line.startswith("#path"):
self.path = line.split(self.seperator)[1].strip()
if line.startswith("#fields"):
self.fields = tuple([x.strip() for x in line.split(
self.seperator)[1:]])
break
if not self.fields:
self.close()
return
self.gzfile.seek(0)
# Note, performance prefers Python 3
filtered = filter(lambda line: '#' not in line,
self.gzfile)
self.reader = csv.DictReader( filtered,
fieldnames=self.fields,
delimiter=self.seperator )
def next(self):
return self.reader.next()
def close(self):
self.gzfile.close()
def __iter__(self):
return self.reader.__iter__()
def __enter__(self):
return self
def __exit__(self, type, value, tb):
return self.close()
def hook_func(row):
""" Just correct some field names and clean data """
try:
row["id_orig_h"] = row.pop('id.orig_h')
row["id_orig_p"] = row.pop('id.orig_p')
row["id_resp_h"] = row.pop('id.resp_h')
row["id_resp_p"] = row.pop('id.resp_p')
row["timestamp"] = row.pop('ts')
except KeyError, e:
pass
return row
def main(infile, outfile):
with BroDictReader(infile, '#') as reader, \
gzip.open(outfile, 'wt') as jsonfile:
if reader.gzfile.closed:
print("Input file does not contain a Bro header.")
return
jsonfile.write('[')
for row in reader:
row["_type"] = reader.path
row = hook_func(row)
json.dump(row, jsonfile)
jsonfile.write(',\n')
jsonfile.write(']')
if __name__ == '__main__':
import sys
main(infile=sys.argv[1], outfile=sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment