Last active
October 14, 2024 05:42
-
-
Save nstarke/f23b018c4e0fd4c8f8ca1cd67dc2b61c to your computer and use it in GitHub Desktop.
Find Compressed Data without Compression Header
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# find-compressed-data.py | |
# | |
# A small script to bruteforce embedded compressed data that might not have a header | |
# Useful for raw binary firmware images that do not contain a standard | |
# binary header (ELF, PE, MACH-O). | |
# | |
# I included a limt on size at 16KB because this has a tendency to create | |
# lots of small files, which are generally false positives. | |
# | |
# I usually run this over every firmware image I need to analyze. | |
# | |
# Usage: python find-compressed-data.py "filename.bin" | |
# | |
import zlib | |
import sys | |
import lzma | |
import bz2 | |
import zipfile | |
import threading | |
import copy | |
LIMIT = 1024 * 16 | |
def do_bz2(compressed_data): | |
try: | |
unzipped = bz2.decompress(compressed_data[i:]) | |
if len(unzipped) > LIMIT: | |
print ('BZ2: Offset Found', i) | |
with open('./result-bz2-' + str(i) + '.bin.bz2', 'wb') as result: | |
result.write(unzipped); | |
result.close() | |
except Exception as ex: | |
pass | |
def do_lzma(compressed_data): | |
try: | |
unzipped = lzma.decompress(compressed_data[i:]) | |
if len(unzipped) > LIMIT: | |
print ('LZMA: Offset Found', i) | |
with open('./result-lzma-' + str(i) + '.bin.lzma', 'wb') as result: | |
result.write(unzipped); | |
result.close() | |
except Exception as ex: | |
pass | |
def do_zlib(compressed_data): | |
for i in range(len(compressed_data)): | |
try: | |
unzipped = zlib.decompress(compressed_data[i:], -zlib.MAX_WBITS) | |
if len(unzipped) > LIMIT: | |
print ('GZIP: Offset found', i) | |
with open('./result-gz-' + str(i) + '.bin.gz', 'wb') as result: | |
result.write(unzipped); | |
result.close() | |
except Exception as ex: | |
pass | |
with open(sys.argv[1], 'rb') as compressed_data: | |
compressed_data = compressed_data.read() | |
thread_zlib = threading.Thread(target=do_zlib, args=(copy.copy(compressed_data),)) | |
thread_lzma = threading.Thread(target=do_lzma, args=(copy.copy(compressed_data),)) | |
thread_bz2 = threading.Thread(target=do_bz2, args=(copy.copy(compressed_data),)) | |
thread_zlib.start() | |
thread_lzma.start() | |
thread_bz2.start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sure it produces KBs files, but despite those files are named
.gz
, the header is not rebuilt, so they're identified as many things and not supported bygzip
, often not even compressed (it produces for example an HTML that I was able to directly read in browser). So I guess its method of finding compressed bits is actually able to find way more than compressed data. Or I make a mistake and it decompress data and separate them with what he find. But so I don't understand the extension