platomav/Binary_Text_Extract.py

## Binary_Text_Extract.py
#!/usr/bin/env python3
#coding=utf-8

"""
Binary Text Extract
Binary ASCII/Unicode Extractor
Copyright (C) 2021 Plato Mavropoulos
Based on https://gist.github.com/williballenthin/8e3913358a7996eab9b96bd57fc59df2 by Willi Ballenthin
"""

title = 'Binary ASCII/Unicode Extractor v1.0'

print('\n' + title)

import sys

# Detect Python version
sys_ver = sys.version_info
if sys_ver < (3,7) :
	sys.stdout.write('\n\nError: Python >= 3.7 required, not %d.%d!\n' % (sys_ver[0], sys_ver[1]))
	(raw_input if sys_ver[0] <= 2 else input)('\nPress enter to exit') # pylint: disable=E0602
	sys.exit(1)

import os
import re
import ctypes
import argparse
import traceback
import collections

# Pause after any unexpected Python exception
# https://stackoverflow.com/a/781074 by Torsten Marek
def show_exception_and_exit(exc_type, exc_value, tb) :
	if exc_type is KeyboardInterrupt :
		print('\n')
	else :
		print('\nError: %s crashed, please report the following:\n' % title)
		traceback.print_exception(exc_type, exc_value, tb)
		input('\nPress enter to exit')

	sys.exit(1)

# Set pause-able Python exception handler
sys.excepthook = show_exception_and_exit

# Set console/shell window title
user_os = sys.platform
if user_os == 'win32' : ctypes.windll.kernel32.SetConsoleTitleW(title)
elif user_os.startswith('linux') or user_os == 'darwin' or user_os.find('bsd') != -1 : sys.stdout.write('\x1b]2;' + title + '\x07')

# Set argparse Arguments
text_extractor = argparse.ArgumentParser()
text_extractor.add_argument('files', type=argparse.FileType('r'), nargs='*')
text_extractor.add_argument('-p', '--path', help='parse files within given folder', type=str)
text_extractor.add_argument('-s', '--size', help='find text of given size or more (default is 4)', type=int)
text_params = text_extractor.parse_args()

# Get all files within path
def get_files(path) :
	inputs = []

	for root, _, files in os.walk(path):
		for name in files :
			inputs.append(os.path.join(root, name))

	return inputs

if len(sys.argv) >= 2 :
	if bool(text_params.path) :
		bin_files = get_files(text_params.path) # CLI with --path
	else :
		bin_files = []
		for executable in text_params.files :
			bin_files.append(executable.name) # Drag & Drop
else :
	in_path = input('\nEnter the full folder path: ')
	bin_files = get_files(in_path) # Direct Run

ASCII_BYTE = rb' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'

String = collections.namedtuple('String', ['s', 'offset'])

char_count = text_params.size if bool(text_params.size) else 4

# Get ASCII Strings
def ascii_strings(buffer, char_count) :
	reg = rb'([%s]{%d,})' % (ASCII_BYTE, char_count)
	ascii_re = re.compile(reg)
	for match in ascii_re.finditer(buffer) :
		yield String(match.group().decode('ascii'), match.start())

# Get Unicode Strings
def unicode_strings(buffer, char_count) :
	reg = rb'((?:[%s]\x00){%d,})' % (ASCII_BYTE, char_count)
	uni_re = re.compile(reg)
	for match in uni_re.finditer(buffer) :
		try :
			yield String(match.group().decode('utf-16'), match.start())
		except UnicodeDecodeError:
			pass

def main() :
	for input_file in bin_files :
		input_name,input_extension = os.path.splitext(os.path.basename(input_file))

		print('\n*** %s%s' % (input_name, input_extension))

		if not os.path.isfile(input_file) :
			print('\n    Error: Cannot find input file %s%s!' % (input_name, input_extension))
			continue

		with open(input_file, 'rb') as in_file : buffer = in_file.read()

		output_text = ''
		output_count = 0

		for s in ascii_strings(buffer, char_count) :
			output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
			output_count += 1

		for s in unicode_strings(buffer, char_count) :
			output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
			output_count += 1

		input_dir = os.path.dirname(os.path.abspath(input_file))
		output_path = os.path.join(input_dir, input_name + input_extension + '.txt')

		with open(output_path, 'w', encoding='utf-8') as out : out.write(output_text)

		print('\n    Extracted %d ASCII/Unicode line(s) of length >= %d!' % (output_count, char_count))

if __name__ == '__main__' :
	main()
	#!/usr/bin/env python3
	#coding=utf-8

	"""
	Binary Text Extract
	Binary ASCII/Unicode Extractor
	Copyright (C) 2021 Plato Mavropoulos
	Based on https://gist.github.com/williballenthin/8e3913358a7996eab9b96bd57fc59df2 by Willi Ballenthin
	"""

	title = 'Binary ASCII/Unicode Extractor v1.0'

	print('\n' + title)

	import sys

	# Detect Python version
	sys_ver = sys.version_info
	if sys_ver < (3,7) :
	sys.stdout.write('\n\nError: Python >= 3.7 required, not %d.%d!\n' % (sys_ver[0], sys_ver[1]))
	(raw_input if sys_ver[0] <= 2 else input)('\nPress enter to exit') # pylint: disable=E0602
	sys.exit(1)

	import os
	import re
	import ctypes
	import argparse
	import traceback
	import collections

	# Pause after any unexpected Python exception
	# https://stackoverflow.com/a/781074 by Torsten Marek
	def show_exception_and_exit(exc_type, exc_value, tb) :
	if exc_type is KeyboardInterrupt :
	print('\n')
	else :
	print('\nError: %s crashed, please report the following:\n' % title)
	traceback.print_exception(exc_type, exc_value, tb)
	input('\nPress enter to exit')

	sys.exit(1)

	# Set pause-able Python exception handler
	sys.excepthook = show_exception_and_exit

	# Set console/shell window title
	user_os = sys.platform
	if user_os == 'win32' : ctypes.windll.kernel32.SetConsoleTitleW(title)
	elif user_os.startswith('linux') or user_os == 'darwin' or user_os.find('bsd') != -1 : sys.stdout.write('\x1b]2;' + title + '\x07')

	# Set argparse Arguments
	text_extractor = argparse.ArgumentParser()
	text_extractor.add_argument('files', type=argparse.FileType('r'), nargs='*')
	text_extractor.add_argument('-p', '--path', help='parse files within given folder', type=str)
	text_extractor.add_argument('-s', '--size', help='find text of given size or more (default is 4)', type=int)
	text_params = text_extractor.parse_args()

	# Get all files within path
	def get_files(path) :
	inputs = []

	for root, _, files in os.walk(path):
	for name in files :
	inputs.append(os.path.join(root, name))

	return inputs

	if len(sys.argv) >= 2 :
	if bool(text_params.path) :
	bin_files = get_files(text_params.path) # CLI with --path
	else :
	bin_files = []
	for executable in text_params.files :
	bin_files.append(executable.name) # Drag & Drop
	else :
	in_path = input('\nEnter the full folder path: ')
	bin_files = get_files(in_path) # Direct Run

	ASCII_BYTE = rb' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\\|\}\\\~\t'

	String = collections.namedtuple('String', ['s', 'offset'])

	char_count = text_params.size if bool(text_params.size) else 4

	# Get ASCII Strings
	def ascii_strings(buffer, char_count) :
	reg = rb'([%s]{%d,})' % (ASCII_BYTE, char_count)
	ascii_re = re.compile(reg)
	for match in ascii_re.finditer(buffer) :
	yield String(match.group().decode('ascii'), match.start())

	# Get Unicode Strings
	def unicode_strings(buffer, char_count) :
	reg = rb'((?:[%s]\x00){%d,})' % (ASCII_BYTE, char_count)
	uni_re = re.compile(reg)
	for match in uni_re.finditer(buffer) :
	try :
	yield String(match.group().decode('utf-16'), match.start())
	except UnicodeDecodeError:
	pass

	def main() :
	for input_file in bin_files :
	input_name,input_extension = os.path.splitext(os.path.basename(input_file))

	print('\n*** %s%s' % (input_name, input_extension))

	if not os.path.isfile(input_file) :
	print('\n Error: Cannot find input file %s%s!' % (input_name, input_extension))
	continue

	with open(input_file, 'rb') as in_file : buffer = in_file.read()

	output_text = ''
	output_count = 0

	for s in ascii_strings(buffer, char_count) :
	output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
	output_count += 1

	for s in unicode_strings(buffer, char_count) :
	output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
	output_count += 1

	input_dir = os.path.dirname(os.path.abspath(input_file))
	output_path = os.path.join(input_dir, input_name + input_extension + '.txt')

	with open(output_path, 'w', encoding='utf-8') as out : out.write(output_text)

	print('\n Extracted %d ASCII/Unicode line(s) of length >= %d!' % (output_count, char_count))

	if __name__ == '__main__' :
	main()