Skip to content

Instantly share code, notes, and snippets.

Created January 7, 2017 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save RoadrunnerWMC/f4253ef38c8f51869674a46ee73eaa9f to your computer and use it in GitHub Desktop.
Save RoadrunnerWMC/f4253ef38c8f51869674a46ee73eaa9f to your computer and use it in GitHub Desktop.
Converts Luigi's Mansion Dark Moon NLOC (translation) files to NLOCT (a custom easy-to-edit format I invented) and back. I should make this easier to use...
# 10/29/16
# NLOC haxxor script
# ~~~~~~~~~~
# NLOC: Next level LOCalization? Localizable text strings.
# Probably does, since if you get a hash ID wrong, it defaults to
# "missing loc string". Which certainly looks like "localized" in
# that context.
# 00-03 "NLOC"
# 04-07 1... always? (TODO: check if it is.)
# 08-0B Looks like a hash of the language name or ID, in some way.
# (considering it's the same in both's but different in
# 0C-0F Number of strings
# String info table:
# 00-03 String offset, relative to the end of this table
# 04-07 Some sort of hash/ID. Entries are sorted by this.
# Consistent across languages for equivalent strings.
# Then UTF-16 (UCS-2?) null-terminated text strings.
# Strings appear to be in whatever order NLG decided to put them in:
# similar strings are grouped together in locally logical order.
import struct
DICT_IN = 'ukenglish.dict'
DICT_OUT = 'ukenglish_modified.dict'
def nlgHashFunction(data):
Reverse-engineered from the binary. `data` must be a bytes object.
This function has some interesting properties:
- case-insensitive
- insensitive to leading whitespace (but not trailing whitespace)
h = -1
for c in data:
if (c - 65) & 0xFFFFFFFF <= 0x19:
c |= 0x20
h = (h * 33 + c) & 0xFFFFFFFF
return h
def strToHash(s):
return nlgHashFunction(s.encode('latin-1'))
def keyAndLine(line):
if line.startswith('"'):
# Hash a string
strEnd = line.index('"', 1)
s = line[1:strEnd]
return strToHash(s), line[strEnd+1:].lstrip()
return int(line.split(' ')[0], 16), line[line.index(' ')+1:].lstrip()
nlocKeyRepo = None
def hashToStr(h):
global nlocKeyRepo
if nlocKeyRepo is None:
# Load it
with open('nlocKeyRepo.txt', 'r', encoding='utf-8') as f:
keys =
except Exception:
print('Could not open nlocKeyRepo.txt. This file is used to '
'list strings that can be used in place of key hash '
'values where possible.')
nlocKeyRepo = {}
for k in keys:
nlocKeyRepo[strToHash(k)] = k
if h in nlocKeyRepo:
return '"%s"' % nlocKeyRepo[h]
return hex(h).upper()[2:]
def generateNLOC(nloct):
Convert nloct to nloc.
To apply multiple NLOCTs as patches, simply append the patches to
the original!
langId = 0
blockcomment = False
strs = {}
for line in nloct.splitlines():
# Allow ### to start/end block-comments a la Coffeescript
if line.startswith('###'):
blockcomment = not blockcomment
if blockcomment: continue
if line.startswith('#') or not line: continue
if line.lower().startswith('langid:'):
# Language ID line; this is handled entirely differently
langIdStr = line[len('langid:'):]
if langIdStr.startswith('"'):
langId = strToHash(langIdStr[1:-1])
langId = int(langIdStr, 16)
idHash, msg = keyAndLine(line)
strs[idHash] = msg
if len(strs) < 1935:
print('WARNING: The NLOC being generated only contains %d strings, '
'whereas retail LMDM NLOCs should have at least 1935!' % len(strs))
elif len(strs) > 1935:
print('WARNING: Your NLOC has more than 1935 strings! (%d)'
% len(strs))
if langId == 0:
print('WARNING: Language ID was not set.')
newNlocHeader = bytearray()
newNlocContents = bytearray()
newNlocHeader.extend(struct.pack('<4s3I', b'NLOC', 1, langId, len(strs)))
# Code RE suggests that the second value there is compared against
# a literal value 1, and fails if it's different.
for id in sorted(strs):
newNlocHeader.extend(struct.pack('<II', id, len(newNlocContents) // 2))
newNlocContents.extend(strs[id].encode('utf-16-le') + b'\0\0')
return bytes(newNlocHeader + newNlocContents)
def readNLOC(data, *, endian='<'):
Convert an NLOC file to NLOCT.
endianName = 'le' if endian == '<' else 'be'
output = []
langId, strCount = struct.unpack_from(endian + 'II', data, 8)
output.append('langid:' + hashToStr(langId))
fullStr = data[0x14 + 8 * strCount:].decode('utf-16-' + endianName)
for i in range(strCount):
sId, sStart = struct.unpack_from(endian + 'II', data, 0x14 + 8 * i)
s = fullStr[sStart:fullStr.index('\x00', sStart + 1)]
output.append(hashToStr(sId) + ' ' + s)
return '\n'.join(output)
def main():
Main function to convert NLOC/NLOCT
# Read the NLOCTs
nlocts = []
for n in NLOCTS_IN:
with open(n, 'r', encoding='utf-8') as f:
# Generate the NLOC
nloc = generateNLOC('\n'.join(nlocts))
# Generate the new .data
data = struct.pack('<4I', 0x12027020, len(nloc), 0, 0) + nloc + b'\0\0'
with open(NLOC_OUT, 'wb') as f:
# Update the .dict horrifically hackily (sorry)
with open(DICT_IN, 'rb') as f:
dict = bytearray(
dict[0x68:0x6C] = struct.pack('<I', len(data) - 16)
dict[0x74:0x78] = struct.pack('<I', len(data))
dict[0x84:0x88] = struct.pack('<I', len(data))
with open(DICT_OUT, 'wb') as f:
Copy link

This is now superseded by, which does the same thing but much better.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment