Skip to content

Instantly share code, notes, and snippets.

@RoadrunnerWMC
Created January 7, 2017 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save RoadrunnerWMC/f4253ef38c8f51869674a46ee73eaa9f to your computer and use it in GitHub Desktop.
Save RoadrunnerWMC/f4253ef38c8f51869674a46ee73eaa9f to your computer and use it in GitHub Desktop.
Converts Luigi's Mansion Dark Moon NLOC (translation) files to NLOCT (a custom easy-to-edit format I invented) and back. I should make this easier to use...
# 10/29/16
# NLOC haxxor script
# ~~~~~~~~~~
# NLOC: Next level LOCalization? Localizable text strings.
# Probably does, since if you get a hash ID wrong, it defaults to
# "missing loc string". Which certainly looks like "localized" in
# that context.
# 00-03 "NLOC"
# 04-07 1... always? (TODO: check if it is.)
# 08-0B Looks like a hash of the language name or ID, in some way.
# (considering it's the same in both english.data's but different in dutch.data)
# 0C-0F Number of strings
# String info table:
# 00-03 String offset, relative to the end of this table
# 04-07 Some sort of hash/ID. Entries are sorted by this.
# Consistent across languages for equivalent strings.
# Then UTF-16 (UCS-2?) null-terminated text strings.
# Strings appear to be in whatever order NLG decided to put them in:
# similar strings are grouped together in locally logical order.
import struct
NLOCTS_IN = [
'nlocts/ukenglish.nloct',
'patches.nloct',
]
DICT_IN = 'ukenglish.dict'
NLOC_OUT = 'ukenglish_modified.data'
DICT_OUT = 'ukenglish_modified.dict'
def nlgHashFunction(data):
"""
Reverse-engineered from the binary. `data` must be a bytes object.
This function has some interesting properties:
- case-insensitive
- insensitive to leading whitespace (but not trailing whitespace)
"""
h = -1
for c in data:
if (c - 65) & 0xFFFFFFFF <= 0x19:
c |= 0x20
h = (h * 33 + c) & 0xFFFFFFFF
return h
def strToHash(s):
return nlgHashFunction(s.encode('latin-1'))
def keyAndLine(line):
if line.startswith('"'):
# Hash a string
strEnd = line.index('"', 1)
s = line[1:strEnd]
return strToHash(s), line[strEnd+1:].lstrip()
return int(line.split(' ')[0], 16), line[line.index(' ')+1:].lstrip()
nlocKeyRepo = None
def hashToStr(h):
global nlocKeyRepo
if nlocKeyRepo is None:
# Load it
try:
with open('nlocKeyRepo.txt', 'r', encoding='utf-8') as f:
keys = f.read().splitlines()
except Exception:
print('Could not open nlocKeyRepo.txt. This file is used to '
'list strings that can be used in place of key hash '
'values where possible.')
nlocKeyRepo = {}
for k in keys:
nlocKeyRepo[strToHash(k)] = k
if h in nlocKeyRepo:
return '"%s"' % nlocKeyRepo[h]
else:
return hex(h).upper()[2:]
def generateNLOC(nloct):
"""
Convert nloct to nloc.
To apply multiple NLOCTs as patches, simply append the patches to
the original!
"""
langId = 0
blockcomment = False
strs = {}
for line in nloct.splitlines():
# Allow ### to start/end block-comments a la Coffeescript
if line.startswith('###'):
blockcomment = not blockcomment
continue
if blockcomment: continue
if line.startswith('#') or not line: continue
if line.lower().startswith('langid:'):
# Language ID line; this is handled entirely differently
langIdStr = line[len('langid:'):]
if langIdStr.startswith('"'):
langId = strToHash(langIdStr[1:-1])
else:
langId = int(langIdStr, 16)
continue
idHash, msg = keyAndLine(line)
strs[idHash] = msg
if len(strs) < 1935:
print('WARNING: The NLOC being generated only contains %d strings, '
'whereas retail LMDM NLOCs should have at least 1935!' % len(strs))
elif len(strs) > 1935:
print('WARNING: Your NLOC has more than 1935 strings! (%d)'
% len(strs))
if langId == 0:
print('WARNING: Language ID was not set.')
newNlocHeader = bytearray()
newNlocContents = bytearray()
newNlocHeader.extend(struct.pack('<4s3I', b'NLOC', 1, langId, len(strs)))
# Code RE suggests that the second value there is compared against
# a literal value 1, and fails if it's different.
for id in sorted(strs):
newNlocHeader.extend(struct.pack('<II', id, len(newNlocContents) // 2))
newNlocContents.extend(strs[id].encode('utf-16-le') + b'\0\0')
return bytes(newNlocHeader + newNlocContents)
def readNLOC(data, *, endian='<'):
"""
Convert an NLOC file to NLOCT.
"""
endianName = 'le' if endian == '<' else 'be'
output = []
langId, strCount = struct.unpack_from(endian + 'II', data, 8)
output.append('langid:' + hashToStr(langId))
fullStr = data[0x14 + 8 * strCount:].decode('utf-16-' + endianName)
for i in range(strCount):
sId, sStart = struct.unpack_from(endian + 'II', data, 0x14 + 8 * i)
s = fullStr[sStart:fullStr.index('\x00', sStart + 1)]
output.append(hashToStr(sId) + ' ' + s)
return '\n'.join(output)
def main():
"""
Main function to convert NLOC/NLOCT
"""
# Read the NLOCTs
nlocts = []
for n in NLOCTS_IN:
with open(n, 'r', encoding='utf-8') as f:
nlocts.append(f.read())
# Generate the NLOC
nloc = generateNLOC('\n'.join(nlocts))
# Generate the new .data
data = struct.pack('<4I', 0x12027020, len(nloc), 0, 0) + nloc + b'\0\0'
with open(NLOC_OUT, 'wb') as f:
f.write(data)
# Update the .dict horrifically hackily (sorry)
with open(DICT_IN, 'rb') as f:
dict = bytearray(f.read())
dict[0x68:0x6C] = struct.pack('<I', len(data) - 16)
dict[0x74:0x78] = struct.pack('<I', len(data))
dict[0x84:0x88] = struct.pack('<I', len(data))
with open(DICT_OUT, 'wb') as f:
f.write(dict)
main()
@RoadrunnerWMC
Copy link
Author

This is now superseded by https://github.com/RoadrunnerWMC/NLOC-Tool, which does the same thing but much better.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment