Last active
February 10, 2022 02:54
-
-
Save nosoop/0594c44cc2e644a1dd7e6d9580199724 to your computer and use it in GitHub Desktop.
A script that imports game translation strings into a SourceMod phrase file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import vdf, itertools, os | |
# imports choice game translation strings into a SourceMod-usable format. | |
valid_languages = { | |
'brazilian': 'pt', 'bulgarian': 'bg', 'czech': 'cze', 'danish': 'da', 'dutch': 'nl', | |
'english': 'en', 'finnish': 'fi', 'french': 'fr', 'german': 'de', 'greek': 'el', | |
'hungarian': 'hu', 'italian': 'it', 'japanese': 'ja', 'korean': 'ko', | |
'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt_p', 'romanian': 'ro', 'russian': 'ru', | |
'schinese': 'chi', 'spanish': 'es', 'swedish': 'sv', 'tchinese': 'zho', 'thai': 'th', | |
'turkish': 'tr', 'ukrainian': 'ua' | |
} | |
native_shortcode = 'en' | |
# https://docs.python.org/3/library/itertools.html#recipes | |
def unique_everseen(iterable, key=None): | |
"List unique elements, preserving order. Remember all elements ever seen." | |
seen = set() | |
seen_add = seen.add | |
if key is None: | |
for element in itertools.filterfalse(seen.__contains__, iterable): | |
seen_add(element) | |
yield element | |
else: | |
for element in iterable: | |
k = key(element) | |
if k not in seen: | |
seen_add(k) | |
yield element | |
def localizations_from_files(path_template, languages = None): | |
language_template = Template(path_template) | |
for language in (languages or valid_languages.keys()): | |
language_filepath = language_template.substitute(language = language) | |
if not os.path.isfile(language_filepath): | |
continue | |
with open(language_filepath, 'r', encoding='UTF-16LE') as language_file: | |
yield valid_languages[language], vdf.load(language_file) | |
if __name__ == '__main__': | |
import argparse, re, collections, functools | |
from string import Template | |
parser = argparse.ArgumentParser( | |
description = "Outputs a translation file with the given translation strings.", | |
usage = "%(prog)s [options]") | |
parser.add_argument('-l', '--language-template', metavar='FORMAT', | |
help="A file template string containing a ${language} substitution placeholder") | |
parser.add_argument('-f', '--translation-file', metavar='FILE', | |
help="A key/value file containing translation strings to be (re)generated") | |
parser.add_argument('--strip-color-bytes', action='store_true', | |
help="If present, will remove \\x01 and \\x03 codes") | |
parser.add_argument('--languages', metavar='LANG', nargs='*', | |
help="A list of languages to process") | |
args = parser.parse_args() | |
sections = None | |
with open(args.translation_file, encoding = 'utf8') as translation_file: | |
sections = vdf.load(translation_file, mapper = collections.OrderedDict) | |
phrases = sections['Phrases'] | |
tokens = phrases.keys() | |
# gather and process desired translation strings into our output translation struct | |
for language, localization_file in localizations_from_files(args.language_template, args.languages): | |
localization = localization_file['lang']['Tokens'] | |
for token in (t for t in tokens if t in localization): | |
translation = localization[token] | |
if args.strip_color_bytes: | |
translation = re.sub(r'|'.join(map(re.escape, [ '\x01', '\x02', '\x03', '\x04' ])), '', translation) | |
# handle %s1 replacement format | |
sourcemod_translation, count = re.subn(r'%s(\d+)', r'{\1}', translation) | |
# maintain existing '#format' entry if possible, user might edit type specifier | |
if count and '#format' not in phrases[token]: | |
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count)) | |
phrases[token][language] = sourcemod_translation | |
# do named token substitution (%var%) as a second pass, maintaining native version order | |
for token, translation_phrase in phrases.items(): | |
# ensure any duplicate names share the same argument number | |
# names will be ordered according to the language defined by native_shortcode | |
named_tokens = list(unique_everseen(re.findall(r'%\w+%', translation_phrase[native_shortcode]))) | |
count = len(named_tokens) | |
if count: | |
# generate format string {n:s},{n+1:s}... | |
if '#format' not in phrases[token]: | |
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count)) | |
# generate list of 2-tuple replacements where %some_token% is paired with {n} | |
repls = [(t, '{{{}}}'.format(n + 1)) for n, t in enumerate(named_tokens)] | |
# process the n-tuple replacements in each localization for the current phrase | |
for language, translation in translation_phrase.items(): | |
sourcemod_translation = functools.reduce(lambda a, kv: a.replace(*kv), repls, translation) | |
phrases[token][language] = sourcemod_translation | |
# write the tokens as comma-separated values into a "#tokens" entry | |
phrases[token]['#tokens'] = ','.join(t.strip('%') for t in named_tokens) | |
# sort by language (#format first) | |
phrases[token] = collections.OrderedDict(sorted(phrases[token].items())) | |
with open(args.translation_file, 'w', encoding = 'utf8') as output_file: | |
vdf.dump(sections, output_file, pretty=True) | |
# print(vdf.dumps(sections, pretty=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample usage:
Takes an input file
$FILE
with the following contents:Run the script like so:
python3 localization_to_sourcemod.py -l '/path/to/game/prefix_${language}.txt' -f $FILE --languages english spanish polish --strip-color-bytes
If on Windows, specify
-l
with double quotes instead of single (e.g."X:\path\to\game\prefix_${language}.txt"
); it's single quoted on Linux to avoid shell substitutions.Replace the language template value with your game's language, where
${language}
will be replaced with language names (e.g., if one file isl4d360ui_tu_danish.txt
, you'll want the language template to bel4d360ui_tu_${language}.txt
).Not specifying any languages in the command line will make the plugin attempt to load every language specified in
valid_languages
(key is the language used in the template, value is the short language code used in SM's translation files).After the script runs,
$FILE
should be updated with the following:As you can see, it's capable of handling a variety of translation placeholder formats, though the
#format
will assume they are strings if not already defined.Any changes to
#format
(e.g., to specify that an argument is a number) will persist between script executions.