Skip to content

Instantly share code, notes, and snippets.

@arashbm
Last active October 30, 2018 19:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arashbm/f8caa41c74d174f0a424d518513f6b53 to your computer and use it in GitHub Desktop.
Save arashbm/f8caa41c74d174f0a424d518513f6b53 to your computer and use it in GitHub Desktop.
PCAP JSON Packet and Flows
import sys
import fileinput
max_dt = 60
active_flows = {}
def print_flow(key, info):
proto, src, dst, srcport, dstport = key
start_time = info["start_time"]
end_time = info["end_time"]
length = info["length"]
packets = info["packet"]
print(proto, src, dst, srcport, dstport, start_time, end_time, length, packets)
print("proto src dst srcport dstport start_time end_time length packets")
for line in fileinput.input():
if fileinput.isfirstline():
continue
time, ipv, proto, src, dst, srcport, dstport, length = line.split()
time = float(time)
length = int(length)
key = (proto, src, dst, srcport, dstport)
if key in active_flows and time - active_flows[key]["start_time"] < max_dt:
print("still active", file=sys.stderr)
active_flows[key]["packet"] += 1
active_flows[key]["length"] += length
active_flows[key]["end_time"] = time
elif key not in active_flows:
print("new flow", file=sys.stderr)
active_flows[key] = {"start_time": time, "packet": 1, "length": length, "end_time": time}
else:
print("expired flow", file=sys.stderr)
print_flow(key, active_flows[key])
active_flows[key] = {"start_time": time, "packet": 1, "length": length, "end_time": time}
for key in active_flows:
print_flow(key, active_flows[key])
import sys
import json
print("time_epoch ip proto src dst srcport dstport plength")
for packet in json.load(open(sys.argv[1])):
layers = packet["_source"]["layers"]
frame = layers["frame"]
eth = layers["eth"]
row = {}
if "ipv6" in layers:
row["ip"] = "v6"
row["src"] = layers["ipv6"]["ipv6.src"]
row["dst"] = layers["ipv6"]["ipv6.dst"]
row["plen"] = layers["ipv6"]["ipv6.plen"]
elif "ip" in layers:
row["ip"] = "v4"
row["src"] = layers["ip"]["ip.src"]
row["dst"] = layers["ip"]["ip.dst"]
row["plen"] = layers["ip"]["ip.len"]
else:
continue
if "tcp" in layers:
row["proto"] = "tcp"
row["srcport"] = layers["tcp"]["tcp.srcport"]
row["dstport"] = layers["tcp"]["tcp.dstport"]
elif "udp" in layers:
row["proto"] = "udp"
row["srcport"] = layers["udp"]["udp.srcport"]
row["dstport"] = layers["udp"]["udp.dstport"]
else:
continue
time_epoch = frame["frame.time_epoch"]
print(time_epoch, row["ip"], row["proto"], row["src"], row["dst"], row["srcport"], row["dstport"], row["plen"])

Use these scripts to turn tshark JSON file (tshark -r blaah.pcap -T json > packets.json) to packet and flow csv files.

Usage

parse_pcap_json.py produces a space-seperated file with one packet per row.

python3 parse_pcap_json.py packets.json > packets.csv

filter_flows.py produces a space-seperated file with one flow per row. It consumes the packet csv file generated by parse_pcap_json.py. Maximum time length of a flow can be changed in the script and defaults to 60 seconds.

python3 filter_flows.py packets.csv > flows.csv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment