nstarke/find-compressed-data.py

## find-compressed-data.py
#!/usr/bin/env python3

#
# find-compressed-data.py
#
# A small script to bruteforce embedded compressed data that might not have a header
# Useful for raw binary firmware images that do not contain a standard
# binary header (ELF, PE, MACH-O).
#
# I included a limt on size at 16KB because this has a tendency to create
# lots of small files, which are generally false positives.
#
# I usually run this over every firmware image I need to analyze.
#
# Usage: python find-compressed-data.py "filename.bin"
#

import zlib
import sys
import lzma
import bz2
import zipfile
import threading
import copy

LIMIT = 1024 * 16

def do_bz2(compressed_data):
    try:
        unzipped = bz2.decompress(compressed_data[i:])
        if len(unzipped) > LIMIT:
            print ('BZ2: Offset Found', i)
            with open('./result-bz2-' + str(i) + '.bin.bz2', 'wb') as result:
                result.write(unzipped);
                result.close()
    except Exception as ex:
        pass

def do_lzma(compressed_data):
    try:
        unzipped = lzma.decompress(compressed_data[i:])
        if len(unzipped) > LIMIT:
            print ('LZMA: Offset Found', i)
            with open('./result-lzma-' + str(i) + '.bin.lzma', 'wb') as result:
                result.write(unzipped);
                result.close()
    except Exception as ex:
        pass

def do_zlib(compressed_data):
    for i in range(len(compressed_data)):
        try:
            unzipped = zlib.decompress(compressed_data[i:], -zlib.MAX_WBITS)
            if len(unzipped) > LIMIT:
                print ('GZIP: Offset found', i)
                with open('./result-gz-' + str(i) + '.bin.gz', 'wb') as result:
                    result.write(unzipped);
                    result.close()
        except Exception as ex:
            pass

with open(sys.argv[1], 'rb') as compressed_data:
    compressed_data = compressed_data.read()
    thread_zlib = threading.Thread(target=do_zlib, args=(copy.copy(compressed_data),))
    thread_lzma = threading.Thread(target=do_lzma, args=(copy.copy(compressed_data),))
    thread_bz2 = threading.Thread(target=do_bz2, args=(copy.copy(compressed_data),))
    thread_zlib.start()
    thread_lzma.start()
    thread_bz2.start()
	#!/usr/bin/env python3

	#
	# find-compressed-data.py
	#
	# A small script to bruteforce embedded compressed data that might not have a header
	# Useful for raw binary firmware images that do not contain a standard
	# binary header (ELF, PE, MACH-O).
	#
	# I included a limt on size at 16KB because this has a tendency to create
	# lots of small files, which are generally false positives.
	#
	# I usually run this over every firmware image I need to analyze.
	#
	# Usage: python find-compressed-data.py "filename.bin"
	#

	import zlib
	import sys
	import lzma
	import bz2
	import zipfile
	import threading
	import copy

	LIMIT = 1024 * 16

	def do_bz2(compressed_data):
	try:
	unzipped = bz2.decompress(compressed_data[i:])
	if len(unzipped) > LIMIT:
	print ('BZ2: Offset Found', i)
	with open('./result-bz2-' + str(i) + '.bin.bz2', 'wb') as result:
	result.write(unzipped);
	result.close()
	except Exception as ex:
	pass

	def do_lzma(compressed_data):
	try:
	unzipped = lzma.decompress(compressed_data[i:])
	if len(unzipped) > LIMIT:
	print ('LZMA: Offset Found', i)
	with open('./result-lzma-' + str(i) + '.bin.lzma', 'wb') as result:
	result.write(unzipped);
	result.close()
	except Exception as ex:
	pass

	def do_zlib(compressed_data):
	for i in range(len(compressed_data)):
	try:
	unzipped = zlib.decompress(compressed_data[i:], -zlib.MAX_WBITS)
	if len(unzipped) > LIMIT:
	print ('GZIP: Offset found', i)
	with open('./result-gz-' + str(i) + '.bin.gz', 'wb') as result:
	result.write(unzipped);
	result.close()
	except Exception as ex:
	pass

	with open(sys.argv[1], 'rb') as compressed_data:
	compressed_data = compressed_data.read()
	thread_zlib = threading.Thread(target=do_zlib, args=(copy.copy(compressed_data),))
	thread_lzma = threading.Thread(target=do_lzma, args=(copy.copy(compressed_data),))
	thread_bz2 = threading.Thread(target=do_bz2, args=(copy.copy(compressed_data),))
	thread_zlib.start()
	thread_lzma.start()
	thread_bz2.start()