Skip to content

Instantly share code, notes, and snippets.

@nmlgc
Created July 8, 2015 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nmlgc/d39befaa43360ae2b4aa to your computer and use it in GitHub Desktop.
Save nmlgc/d39befaa43360ae2b4aa to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Touhou Community Reliant Automatic Patcher
# Scripts
#
# ----
#
"""Locates hardcoded strings in a PE file according to a stringdefs.js file and
an optional blacklist. Dumps their relative positions in stringlocs format, as
well as lists of both newly found and garbage strings in the following format:
[
"string", relative position, file offset,
...
]"""
import shutil
import os
import argparse
import json
import pefile
import re
import utils
parser = argparse.ArgumentParser(
description=__doc__
)
parser.add_argument(
'pe',
help='Portable Executable file.',
type=str,
)
parser.add_argument(
'stringdefs',
help='Original string definitions.',
type=str,
)
parser.add_argument(
'-s', '--section',
metavar='.sect',
help='Name of the PE section to parse for strings (default: ".rdata").',
default='.rdata',
type=str,
)
parser.add_argument(
'-b', '--blacklist',
metavar='fn',
help='Optional file containing a JSON array of strings to ignore.',
default='',
type=str
)
parser.add_argument(
'-e', '--encoding',
help='String encoding (default: "cp932").',
default='cp932',
type=str
)
def json_loadf(fn):
if len(fn):
return utils.json_load(fn)
else:
return {}
def filter_string(string):
rx_garbage = re.compile(r'[\u0000-\u001f]')
if rx_garbage.match(string[-1:]):
return ''
else:
return rx_garbage.sub('', string)
def get_string_from_data(offset, data):
"""Get a string from within the data."""
ret = bytearray()
try:
b = data[offset]
except IndexError:
return ret
while b:
ret.append(b)
offset += 1
try:
b = data[offset]
except IndexError:
break
return ret
def push_new(array, pe, offset, str_raw, encoding):
try:
str_dec = str_raw.decode(encoding)
if len(filter_string(str_dec)) > 0 and str_dec not in array:
rva = "Rx{:x}".format(pe.get_rva_from_offset(offset))
array.append(str_dec)
array.append(rva)
array.append(offset)
except UnicodeDecodeError:
pass
def locate_string(pe, data, data_offset, string, encoding, garbage):
"""Returns the RVA of [string]."""
rva = None
try:
str_enc = string.encode(encoding) + bytearray(1)
pos = data.find(str_enc)
except UnicodeEncodeError:
pos = -1
if pos != -1:
rva = "Rx{:x}".format(pe.get_rva_from_offset(data_offset + pos))
data[pos:pos+len(str_enc)] = bytearray(len(str_enc))
# Clear every byte before the string until a null byte is reached.
# Everything in that range is sure to be non-string garbage.
g = pos - 1
while (g != -1) and (data[g] != 0):
g = g - 1
push_new(garbage, pe, data_offset + g, data[g + 1:pos], encoding)
data[g + 1:pos] = bytearray(pos - (g + 1))
return rva
def locate(data, data_offset, stringdefs, blacklist, encoding):
found = {}
garbage = []
for key, value in stringdefs.items():
if type(value) is str:
value = [value]
for i in value:
rva = locate_string(pe, data, data_offset, i, encoding, garbage)
if rva:
found[rva] = key
for string in blacklist:
locate_string(pe, data, data_offset, string, encoding, garbage)
return found, garbage
def parse(data, data_offset, encoding):
new = []
i = 0
while i < len(data):
str_raw = get_string_from_data(i, data)
push_new(new, pe, i + data_offset, str_raw, encoding)
i += len(str_raw) + 1
return new
if __name__ == '__main__':
arg = parser.parse_args()
pe = pefile.PE(name=arg.pe)
stringdefs = json_loadf(arg.stringdefs)
blacklist = json_loadf(arg.blacklist)
blacklist = [x for x in blacklist if type(x) is str and x[:2] != 'Rx']
rx_pseudodict = re.compile(r',\n\t"Rx([0-9a-f]+)",\n\t')
for i in pe.sections:
if i.Name.decode('ascii')[:len(arg.section)] == arg.section:
start = i.PointerToRawData
end = start + i.SizeOfRawData
data = bytearray(pe.__data__[start:end])
found, garbage = locate(
data, start, stringdefs, blacklist, arg.encoding
)
new = parse(data, start, arg.encoding)
new_dump = json.dumps(new, **utils.json_dump_params)
garbage_dump = json.dumps(garbage, **utils.json_dump_params)
print(
'Found:\n' +
json.dumps(found, **utils.json_dump_params) +
'New:\n' +
rx_pseudodict.sub(r', "Rx\1", ', new_dump) +
'Garbage:\n' +
rx_pseudodict.sub(r', "Rx\1", ', garbage_dump)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment