Skip to content

Instantly share code, notes, and snippets.

@platomav
Forked from williballenthin/strings.py
Created July 14, 2022 21:10
Show Gist options
  • Save platomav/bfe567b0a810bd0d5294ac6e83cdde19 to your computer and use it in GitHub Desktop.
Save platomav/bfe567b0a810bd0d5294ac6e83cdde19 to your computer and use it in GitHub Desktop.
Extract ASCII and Unicode strings using Python.
#!/usr/bin/env python3
#coding=utf-8
"""
Binary Text Extract
Binary ASCII/Unicode Extractor
Copyright (C) 2021 Plato Mavropoulos
Based on https://gist.github.com/williballenthin/8e3913358a7996eab9b96bd57fc59df2 by Willi Ballenthin
"""
title = 'Binary ASCII/Unicode Extractor v1.0'
print('\n' + title)
import sys
# Detect Python version
sys_ver = sys.version_info
if sys_ver < (3,7) :
sys.stdout.write('\n\nError: Python >= 3.7 required, not %d.%d!\n' % (sys_ver[0], sys_ver[1]))
(raw_input if sys_ver[0] <= 2 else input)('\nPress enter to exit') # pylint: disable=E0602
sys.exit(1)
import os
import re
import ctypes
import argparse
import traceback
import collections
# Pause after any unexpected Python exception
# https://stackoverflow.com/a/781074 by Torsten Marek
def show_exception_and_exit(exc_type, exc_value, tb) :
if exc_type is KeyboardInterrupt :
print('\n')
else :
print('\nError: %s crashed, please report the following:\n' % title)
traceback.print_exception(exc_type, exc_value, tb)
input('\nPress enter to exit')
sys.exit(1)
# Set pause-able Python exception handler
sys.excepthook = show_exception_and_exit
# Set console/shell window title
user_os = sys.platform
if user_os == 'win32' : ctypes.windll.kernel32.SetConsoleTitleW(title)
elif user_os.startswith('linux') or user_os == 'darwin' or user_os.find('bsd') != -1 : sys.stdout.write('\x1b]2;' + title + '\x07')
# Set argparse Arguments
text_extractor = argparse.ArgumentParser()
text_extractor.add_argument('files', type=argparse.FileType('r'), nargs='*')
text_extractor.add_argument('-p', '--path', help='parse files within given folder', type=str)
text_extractor.add_argument('-s', '--size', help='find text of given size or more (default is 4)', type=int)
text_params = text_extractor.parse_args()
# Get all files within path
def get_files(path) :
inputs = []
for root, _, files in os.walk(path):
for name in files :
inputs.append(os.path.join(root, name))
return inputs
if len(sys.argv) >= 2 :
if bool(text_params.path) :
bin_files = get_files(text_params.path) # CLI with --path
else :
bin_files = []
for executable in text_params.files :
bin_files.append(executable.name) # Drag & Drop
else :
in_path = input('\nEnter the full folder path: ')
bin_files = get_files(in_path) # Direct Run
ASCII_BYTE = rb' !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t'
String = collections.namedtuple('String', ['s', 'offset'])
char_count = text_params.size if bool(text_params.size) else 4
# Get ASCII Strings
def ascii_strings(buffer, char_count) :
reg = rb'([%s]{%d,})' % (ASCII_BYTE, char_count)
ascii_re = re.compile(reg)
for match in ascii_re.finditer(buffer) :
yield String(match.group().decode('ascii'), match.start())
# Get Unicode Strings
def unicode_strings(buffer, char_count) :
reg = rb'((?:[%s]\x00){%d,})' % (ASCII_BYTE, char_count)
uni_re = re.compile(reg)
for match in uni_re.finditer(buffer) :
try :
yield String(match.group().decode('utf-16'), match.start())
except UnicodeDecodeError:
pass
def main() :
for input_file in bin_files :
input_name,input_extension = os.path.splitext(os.path.basename(input_file))
print('\n*** %s%s' % (input_name, input_extension))
if not os.path.isfile(input_file) :
print('\n Error: Cannot find input file %s%s!' % (input_name, input_extension))
continue
with open(input_file, 'rb') as in_file : buffer = in_file.read()
output_text = ''
output_count = 0
for s in ascii_strings(buffer, char_count) :
output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
output_count += 1
for s in unicode_strings(buffer, char_count) :
output_text += '0x{:08X}: {:s}\n'.format(s.offset, s.s)
output_count += 1
input_dir = os.path.dirname(os.path.abspath(input_file))
output_path = os.path.join(input_dir, input_name + input_extension + '.txt')
with open(output_path, 'w', encoding='utf-8') as out : out.write(output_text)
print('\n Extracted %d ASCII/Unicode line(s) of length >= %d!' % (output_count, char_count))
if __name__ == '__main__' :
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment