Skip to content

Instantly share code, notes, and snippets.

@cloudjunky
Created August 14, 2014 06:11
Show Gist options
  • Save cloudjunky/015cdff8bcd3815004d6 to your computer and use it in GitHub Desktop.
Save cloudjunky/015cdff8bcd3815004d6 to your computer and use it in GitHub Desktop.
TShark to Python DPI and save in Mongo
__author__ = 'cloudjunky@gmail.com'
import sys
import hashlib
import pymongo
"""
:usage: tshark -T fields -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e frame.protocols -r capture.pcap | python dpi_parser.py
"""
print "Processing stdin....."
print sys.argv
flows = {}
for line in sys.stdin:
try:
src, sport, dst, dport, proto = line.strip().split('\t')
flow = (src, int(sport)), (dst, int(dport))
flow_hash = hashlib.sha1(str(flow)).hexdigest()
#print "{} => {}".format(flow, flow_hash)
if flow_hash in flows:
pass
else:
if proto:
layers = proto.split(':')
if 'tcp' in layers:
flows[flow_hash] = dict(hash=flow_hash, src=src, sport=sport,
dst=dst, dport=dport, flow=flow, proto=layers[3])
if layers[4:]:
flows[flow_hash].update(app=layers[4:])
except Exception, e:
#print "{}\nerror in line {}".format(e, line)
continue
print "Found application data in {} flows".format(len(flows.keys()))
conn = pymongo.Connection('localhost', 27017)
db = conn.packetloop
collection = db['captures']
dpis = db.dpis
print "Writing to Mongo"
for id, flow in flows.iteritems():
db.dpis.insert(flow)
print "Done!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment