Created
October 22, 2022 09:43
-
-
Save linuskmr/1a6b03e8ac5f71e433ae55ed84f2e88c to your computer and use it in GitHub Desktop.
HTTP Multipart Form-Data Extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
# Read captured multipart form data that was extracted via Wireshark | |
with open('extracted_multipart.bin', 'rb') as file: | |
multipart = file.read() | |
# Multipart parts are separated by a boundary like | |
# -----------------------------1881080412979790561529271372 | |
# Find the next boundary | |
while boundary_match := re.search(rb'(-+\d+\r\n)', multipart): | |
boundary = boundary_match.group(1) | |
print(f'{boundary=}') | |
# Remove boundary from multipart | |
multipart = multipart[boundary_match.end():] | |
# Extract headers | |
headers_str = multipart[:multipart.find(b'\r\n\r\n')] | |
headers: list = headers_str.splitlines() | |
print(f'{headers=}') | |
# Extract filename | |
filename = re.search(rb'filename="(.+)"', headers_str).group(1) | |
print(f'{filename=}') | |
# Remove headers from multipart | |
multipart = multipart[multipart.find(b'\r\n\r\n')+4:] | |
# Find the end boundary | |
part_end: int = multipart.find(boundary) | |
# The part is everything before the end boundary | |
content: bytes = multipart[:part_end] | |
# Write the content bytes to a file | |
extracted_dir = b'extracted/' | |
os.makedirs(extracted_dir, exist_ok=True) | |
with open(extracted_dir + filename, 'wb') as file: | |
file.write(content) | |
# Remove the end boundary from the multipart | |
multipart = multipart[part_end + len(boundary):] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment