Skip to content

Instantly share code, notes, and snippets.

@pramos
Last active August 2, 2021 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pramos/70c34fbe60893912a7467fb2784fa267 to your computer and use it in GitHub Desktop.
Save pramos/70c34fbe60893912a7467fb2784fa267 to your computer and use it in GitHub Desktop.
Extracting URLs from pcap file with Scapy using rdpcap()
import sys
import re
from scapy.all import *
from memory_profiler import profile
@profile
def get_url_from_payload(payload):
http_header_regex = r"(?P<name>.*?): (?P<value>.*?)\r\n"
start = payload.index(b"GET ") +4
end = payload.index(b" HTTP/1.1")
url_path = payload[start:end].decode("utf8")
http_header_raw = payload[:payload.index(b"\r\n\r\n") + 2 ]
http_header_parsed = dict(re.findall(http_header_regex, http_header_raw.decode("utf8")))
url = http_header_parsed["Host"] + url_path + "\n"
return url
@profile
def parse_pcap(pcap_path, urls_file):
pcap_flow = rdpcap(pcap_path)
sessions = pcap_flow.sessions()
urls_output = open(urls_file, "wb")
for session in sessions:
for packet in sessions[session]:
try:
if packet[TCP].dport == 80:
payload = bytes(packet[TCP].payload)
url = get_url_from_payload(payload)
urls_output.write(url.encode())
except Exception as e:
pass
urls_output.close()
def main(arguments):
if len(arguments) == 5:
if arguments[1] == "--pcap" and arguments[3] == "--output":
parse_pcap(arguments[2], arguments[4])
if __name__ == "__main__":
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment