Created
January 7, 2017 03:09
-
-
Save RoadrunnerWMC/f4253ef38c8f51869674a46ee73eaa9f to your computer and use it in GitHub Desktop.
Converts Luigi's Mansion Dark Moon NLOC (translation) files to NLOCT (a custom easy-to-edit format I invented) and back. I should make this easier to use...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 10/29/16 | |
# NLOC haxxor script | |
# ~~~~~~~~~~ | |
# NLOC: Next level LOCalization? Localizable text strings. | |
# Probably does, since if you get a hash ID wrong, it defaults to | |
# "missing loc string". Which certainly looks like "localized" in | |
# that context. | |
# 00-03 "NLOC" | |
# 04-07 1... always? (TODO: check if it is.) | |
# 08-0B Looks like a hash of the language name or ID, in some way. | |
# (considering it's the same in both english.data's but different in dutch.data) | |
# 0C-0F Number of strings | |
# String info table: | |
# 00-03 String offset, relative to the end of this table | |
# 04-07 Some sort of hash/ID. Entries are sorted by this. | |
# Consistent across languages for equivalent strings. | |
# Then UTF-16 (UCS-2?) null-terminated text strings. | |
# Strings appear to be in whatever order NLG decided to put them in: | |
# similar strings are grouped together in locally logical order. | |
import struct | |
NLOCTS_IN = [ | |
'nlocts/ukenglish.nloct', | |
'patches.nloct', | |
] | |
DICT_IN = 'ukenglish.dict' | |
NLOC_OUT = 'ukenglish_modified.data' | |
DICT_OUT = 'ukenglish_modified.dict' | |
def nlgHashFunction(data): | |
""" | |
Reverse-engineered from the binary. `data` must be a bytes object. | |
This function has some interesting properties: | |
- case-insensitive | |
- insensitive to leading whitespace (but not trailing whitespace) | |
""" | |
h = -1 | |
for c in data: | |
if (c - 65) & 0xFFFFFFFF <= 0x19: | |
c |= 0x20 | |
h = (h * 33 + c) & 0xFFFFFFFF | |
return h | |
def strToHash(s): | |
return nlgHashFunction(s.encode('latin-1')) | |
def keyAndLine(line): | |
if line.startswith('"'): | |
# Hash a string | |
strEnd = line.index('"', 1) | |
s = line[1:strEnd] | |
return strToHash(s), line[strEnd+1:].lstrip() | |
return int(line.split(' ')[0], 16), line[line.index(' ')+1:].lstrip() | |
nlocKeyRepo = None | |
def hashToStr(h): | |
global nlocKeyRepo | |
if nlocKeyRepo is None: | |
# Load it | |
try: | |
with open('nlocKeyRepo.txt', 'r', encoding='utf-8') as f: | |
keys = f.read().splitlines() | |
except Exception: | |
print('Could not open nlocKeyRepo.txt. This file is used to ' | |
'list strings that can be used in place of key hash ' | |
'values where possible.') | |
nlocKeyRepo = {} | |
for k in keys: | |
nlocKeyRepo[strToHash(k)] = k | |
if h in nlocKeyRepo: | |
return '"%s"' % nlocKeyRepo[h] | |
else: | |
return hex(h).upper()[2:] | |
def generateNLOC(nloct): | |
""" | |
Convert nloct to nloc. | |
To apply multiple NLOCTs as patches, simply append the patches to | |
the original! | |
""" | |
langId = 0 | |
blockcomment = False | |
strs = {} | |
for line in nloct.splitlines(): | |
# Allow ### to start/end block-comments a la Coffeescript | |
if line.startswith('###'): | |
blockcomment = not blockcomment | |
continue | |
if blockcomment: continue | |
if line.startswith('#') or not line: continue | |
if line.lower().startswith('langid:'): | |
# Language ID line; this is handled entirely differently | |
langIdStr = line[len('langid:'):] | |
if langIdStr.startswith('"'): | |
langId = strToHash(langIdStr[1:-1]) | |
else: | |
langId = int(langIdStr, 16) | |
continue | |
idHash, msg = keyAndLine(line) | |
strs[idHash] = msg | |
if len(strs) < 1935: | |
print('WARNING: The NLOC being generated only contains %d strings, ' | |
'whereas retail LMDM NLOCs should have at least 1935!' % len(strs)) | |
elif len(strs) > 1935: | |
print('WARNING: Your NLOC has more than 1935 strings! (%d)' | |
% len(strs)) | |
if langId == 0: | |
print('WARNING: Language ID was not set.') | |
newNlocHeader = bytearray() | |
newNlocContents = bytearray() | |
newNlocHeader.extend(struct.pack('<4s3I', b'NLOC', 1, langId, len(strs))) | |
# Code RE suggests that the second value there is compared against | |
# a literal value 1, and fails if it's different. | |
for id in sorted(strs): | |
newNlocHeader.extend(struct.pack('<II', id, len(newNlocContents) // 2)) | |
newNlocContents.extend(strs[id].encode('utf-16-le') + b'\0\0') | |
return bytes(newNlocHeader + newNlocContents) | |
def readNLOC(data, *, endian='<'): | |
""" | |
Convert an NLOC file to NLOCT. | |
""" | |
endianName = 'le' if endian == '<' else 'be' | |
output = [] | |
langId, strCount = struct.unpack_from(endian + 'II', data, 8) | |
output.append('langid:' + hashToStr(langId)) | |
fullStr = data[0x14 + 8 * strCount:].decode('utf-16-' + endianName) | |
for i in range(strCount): | |
sId, sStart = struct.unpack_from(endian + 'II', data, 0x14 + 8 * i) | |
s = fullStr[sStart:fullStr.index('\x00', sStart + 1)] | |
output.append(hashToStr(sId) + ' ' + s) | |
return '\n'.join(output) | |
def main(): | |
""" | |
Main function to convert NLOC/NLOCT | |
""" | |
# Read the NLOCTs | |
nlocts = [] | |
for n in NLOCTS_IN: | |
with open(n, 'r', encoding='utf-8') as f: | |
nlocts.append(f.read()) | |
# Generate the NLOC | |
nloc = generateNLOC('\n'.join(nlocts)) | |
# Generate the new .data | |
data = struct.pack('<4I', 0x12027020, len(nloc), 0, 0) + nloc + b'\0\0' | |
with open(NLOC_OUT, 'wb') as f: | |
f.write(data) | |
# Update the .dict horrifically hackily (sorry) | |
with open(DICT_IN, 'rb') as f: | |
dict = bytearray(f.read()) | |
dict[0x68:0x6C] = struct.pack('<I', len(data) - 16) | |
dict[0x74:0x78] = struct.pack('<I', len(data)) | |
dict[0x84:0x88] = struct.pack('<I', len(data)) | |
with open(DICT_OUT, 'wb') as f: | |
f.write(dict) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is now superseded by https://github.com/RoadrunnerWMC/NLOC-Tool, which does the same thing but much better.