Converts Luigi's Mansion Dark Moon NLOC (translation) files to NLOCT (a custom easy-to-edit format I invented) and back. I should make this easier to use...
| # 10/29/16 | |
| # NLOC haxxor script | |
| # ~~~~~~~~~~ | |
| # NLOC: Next level LOCalization? Localizable text strings. | |
| # Probably does, since if you get a hash ID wrong, it defaults to | |
| # "missing loc string". Which certainly looks like "localized" in | |
| # that context. | |
| # 00-03 "NLOC" | |
| # 04-07 1... always? (TODO: check if it is.) | |
| # 08-0B Looks like a hash of the language name or ID, in some way. | |
| # (considering it's the same in both english.data's but different in dutch.data) | |
| # 0C-0F Number of strings | |
| # String info table: | |
| # 00-03 String offset, relative to the end of this table | |
| # 04-07 Some sort of hash/ID. Entries are sorted by this. | |
| # Consistent across languages for equivalent strings. | |
| # Then UTF-16 (UCS-2?) null-terminated text strings. | |
| # Strings appear to be in whatever order NLG decided to put them in: | |
| # similar strings are grouped together in locally logical order. | |
| import struct | |
| NLOCTS_IN = [ | |
| 'nlocts/ukenglish.nloct', | |
| 'patches.nloct', | |
| ] | |
| DICT_IN = 'ukenglish.dict' | |
| NLOC_OUT = 'ukenglish_modified.data' | |
| DICT_OUT = 'ukenglish_modified.dict' | |
| def nlgHashFunction(data): | |
| """ | |
| Reverse-engineered from the binary. `data` must be a bytes object. | |
| This function has some interesting properties: | |
| - case-insensitive | |
| - insensitive to leading whitespace (but not trailing whitespace) | |
| """ | |
| h = -1 | |
| for c in data: | |
| if (c - 65) & 0xFFFFFFFF <= 0x19: | |
| c |= 0x20 | |
| h = (h * 33 + c) & 0xFFFFFFFF | |
| return h | |
| def strToHash(s): | |
| return nlgHashFunction(s.encode('latin-1')) | |
| def keyAndLine(line): | |
| if line.startswith('"'): | |
| # Hash a string | |
| strEnd = line.index('"', 1) | |
| s = line[1:strEnd] | |
| return strToHash(s), line[strEnd+1:].lstrip() | |
| return int(line.split(' ')[0], 16), line[line.index(' ')+1:].lstrip() | |
| nlocKeyRepo = None | |
| def hashToStr(h): | |
| global nlocKeyRepo | |
| if nlocKeyRepo is None: | |
| # Load it | |
| try: | |
| with open('nlocKeyRepo.txt', 'r', encoding='utf-8') as f: | |
| keys = f.read().splitlines() | |
| except Exception: | |
| print('Could not open nlocKeyRepo.txt. This file is used to ' | |
| 'list strings that can be used in place of key hash ' | |
| 'values where possible.') | |
| nlocKeyRepo = {} | |
| for k in keys: | |
| nlocKeyRepo[strToHash(k)] = k | |
| if h in nlocKeyRepo: | |
| return '"%s"' % nlocKeyRepo[h] | |
| else: | |
| return hex(h).upper()[2:] | |
| def generateNLOC(nloct): | |
| """ | |
| Convert nloct to nloc. | |
| To apply multiple NLOCTs as patches, simply append the patches to | |
| the original! | |
| """ | |
| langId = 0 | |
| blockcomment = False | |
| strs = {} | |
| for line in nloct.splitlines(): | |
| # Allow ### to start/end block-comments a la Coffeescript | |
| if line.startswith('###'): | |
| blockcomment = not blockcomment | |
| continue | |
| if blockcomment: continue | |
| if line.startswith('#') or not line: continue | |
| if line.lower().startswith('langid:'): | |
| # Language ID line; this is handled entirely differently | |
| langIdStr = line[len('langid:'):] | |
| if langIdStr.startswith('"'): | |
| langId = strToHash(langIdStr[1:-1]) | |
| else: | |
| langId = int(langIdStr, 16) | |
| continue | |
| idHash, msg = keyAndLine(line) | |
| strs[idHash] = msg | |
| if len(strs) < 1935: | |
| print('WARNING: The NLOC being generated only contains %d strings, ' | |
| 'whereas retail LMDM NLOCs should have at least 1935!' % len(strs)) | |
| elif len(strs) > 1935: | |
| print('WARNING: Your NLOC has more than 1935 strings! (%d)' | |
| % len(strs)) | |
| if langId == 0: | |
| print('WARNING: Language ID was not set.') | |
| newNlocHeader = bytearray() | |
| newNlocContents = bytearray() | |
| newNlocHeader.extend(struct.pack('<4s3I', b'NLOC', 1, langId, len(strs))) | |
| # Code RE suggests that the second value there is compared against | |
| # a literal value 1, and fails if it's different. | |
| for id in sorted(strs): | |
| newNlocHeader.extend(struct.pack('<II', id, len(newNlocContents) // 2)) | |
| newNlocContents.extend(strs[id].encode('utf-16-le') + b'\0\0') | |
| return bytes(newNlocHeader + newNlocContents) | |
| def readNLOC(data, *, endian='<'): | |
| """ | |
| Convert an NLOC file to NLOCT. | |
| """ | |
| endianName = 'le' if endian == '<' else 'be' | |
| output = [] | |
| langId, strCount = struct.unpack_from(endian + 'II', data, 8) | |
| output.append('langid:' + hashToStr(langId)) | |
| fullStr = data[0x14 + 8 * strCount:].decode('utf-16-' + endianName) | |
| for i in range(strCount): | |
| sId, sStart = struct.unpack_from(endian + 'II', data, 0x14 + 8 * i) | |
| s = fullStr[sStart:fullStr.index('\x00', sStart + 1)] | |
| output.append(hashToStr(sId) + ' ' + s) | |
| return '\n'.join(output) | |
| def main(): | |
| """ | |
| Main function to convert NLOC/NLOCT | |
| """ | |
| # Read the NLOCTs | |
| nlocts = [] | |
| for n in NLOCTS_IN: | |
| with open(n, 'r', encoding='utf-8') as f: | |
| nlocts.append(f.read()) | |
| # Generate the NLOC | |
| nloc = generateNLOC('\n'.join(nlocts)) | |
| # Generate the new .data | |
| data = struct.pack('<4I', 0x12027020, len(nloc), 0, 0) + nloc + b'\0\0' | |
| with open(NLOC_OUT, 'wb') as f: | |
| f.write(data) | |
| # Update the .dict horrifically hackily (sorry) | |
| with open(DICT_IN, 'rb') as f: | |
| dict = bytearray(f.read()) | |
| dict[0x68:0x6C] = struct.pack('<I', len(data) - 16) | |
| dict[0x74:0x78] = struct.pack('<I', len(data)) | |
| dict[0x84:0x88] = struct.pack('<I', len(data)) | |
| with open(DICT_OUT, 'wb') as f: | |
| f.write(dict) | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment