Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Created July 4, 2024 13:54
Show Gist options
  • Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Split a file into two by binary signature
import argparse, shutil, os
def find_and_dump(file_path, signature, ext1, ext2):
"""Finds a byte signature in a file and dumps the content before and after that point.
Args:
file_path: Path to the file to search.
signature: Byte signature to search for.
ext: Extension to use for the output files.
Returns:
True if the signature was found and content dumped, False otherwise.
"""
with open(file_path, 'rb') as f:
buffer_size = 4096
print(f"signature: {signature} buffer: {buffer_size}")
prev_buffer = b''
buffer = f.read(buffer_size)
while buffer:
# Search for the signature in the current buffer and the overlapping region
combined_buffer = prev_buffer + buffer
pos = combined_buffer.find(signature)
if pos != -1:
# Signature found!
# Calculate the correct position in the file
file_pos = f.tell() - len(combined_buffer) + pos
# Create output file names (append "_before" and "_after" to the original name)
base, _ = os.path.splitext(file_path)
before_file_path = base + ext2
after_file_path = base + ext1
# Dump content before the signature
with open(before_file_path, 'wb') as before_file:
f.seek(0)
before_file.write(f.read(file_pos))
print(f"Content before signature dumped to: {before_file_path}")
# Dump content after the signature
with open(after_file_path, 'wb') as after_file:
f.seek(file_pos)
shutil.copyfileobj(f, after_file) # Efficiently copy the remaining content
print(f"Content after signature dumped to: {after_file_path}")
return True
prev_buffer = buffer[-len(signature):] # Store the overlapping region for the next iteration
buffer = f.read(buffer_size) # Read the next chunk
# Signature not found
print(f"Signature not found in {file_path}")
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find a byte signature and dump content.')
parser.add_argument('file', help='Path to the file to search.')
parser.add_argument('-s', '--signature', default=None, help='Byte signature to search for (e.g., "DE AD BE EF").')
parser.add_argument('-b', '--beforeext', default='.bin', help='Extension of the first file.')
parser.add_argument('-a', '--aftertext', default='.xp3', help='Extension of the second file.')
args = parser.parse_args()
# Convert signature string to bytes object
signature_bytes = b'XP3\r\n\x20\x0A\x1A\x8B\x67\x01' if not args.signature else bytes.fromhex(args.signature.replace(" ", ""))
find_and_dump(args.file, signature_bytes, args.aftertext, args.beforeext)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment