linuskmr/multipart_extractor.py

## multipart_extractor.py
import re
import os


# Read captured multipart form data that was extracted via Wireshark
with open('extracted_multipart.bin', 'rb') as file:
	multipart = file.read()

# Multipart parts are separated by a boundary like
# -----------------------------1881080412979790561529271372

# Find the next boundary
while boundary_match := re.search(rb'(-+\d+\r\n)', multipart):
	boundary = boundary_match.group(1)
	print(f'{boundary=}')

	# Remove boundary from multipart
	multipart = multipart[boundary_match.end():]

	# Extract headers
	headers_str = multipart[:multipart.find(b'\r\n\r\n')]
	headers: list = headers_str.splitlines()
	print(f'{headers=}')

	# Extract filename
	filename = re.search(rb'filename="(.+)"', headers_str).group(1)
	print(f'{filename=}')

	# Remove headers from multipart
	multipart = multipart[multipart.find(b'\r\n\r\n')+4:]

	# Find the end boundary
	part_end: int = multipart.find(boundary)

	# The part is everything before the end boundary
	content: bytes = multipart[:part_end]

	# Write the content bytes to a file
	extracted_dir = b'extracted/'
	os.makedirs(extracted_dir, exist_ok=True)
	with open(extracted_dir + filename, 'wb') as file:
		file.write(content)

	# Remove the end boundary from the multipart
	multipart = multipart[part_end + len(boundary):]
	import re
	import os


	# Read captured multipart form data that was extracted via Wireshark
	with open('extracted_multipart.bin', 'rb') as file:
	multipart = file.read()

	# Multipart parts are separated by a boundary like
	# -----------------------------1881080412979790561529271372

	# Find the next boundary
	while boundary_match := re.search(rb'(-+\d+\r\n)', multipart):
	boundary = boundary_match.group(1)
	print(f'{boundary=}')

	# Remove boundary from multipart
	multipart = multipart[boundary_match.end():]

	# Extract headers
	headers_str = multipart[:multipart.find(b'\r\n\r\n')]
	headers: list = headers_str.splitlines()
	print(f'{headers=}')

	# Extract filename
	filename = re.search(rb'filename="(.+)"', headers_str).group(1)
	print(f'{filename=}')

	# Remove headers from multipart
	multipart = multipart[multipart.find(b'\r\n\r\n')+4:]

	# Find the end boundary
	part_end: int = multipart.find(boundary)

	# The part is everything before the end boundary
	content: bytes = multipart[:part_end]

	# Write the content bytes to a file
	extracted_dir = b'extracted/'
	os.makedirs(extracted_dir, exist_ok=True)
	with open(extracted_dir + filename, 'wb') as file:
	file.write(content)

	# Remove the end boundary from the multipart
	multipart = multipart[part_end + len(boundary):]