Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A script that imports game translation strings into a SourceMod phrase file.
import vdf, itertools, os
# imports choice game translation strings into a SourceMod-usable format.
valid_languages = {
'brazilian': 'pt', 'bulgarian': 'bg', 'czech': 'cze', 'danish': 'da', 'dutch': 'nl',
'english': 'en', 'finnish': 'fi', 'french': 'fr', 'german': 'de', 'greek': 'el',
'hungarian': 'hu', 'italian': 'it', 'japanese': 'ja', 'korean': 'ko',
'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt_p', 'romanian': 'ro', 'russian': 'ru',
'schinese': 'chi', 'spanish': 'es', 'swedish': 'sv', 'tchinese': 'zho', 'thai': 'th',
'turkish': 'tr', 'ukrainian': 'ua'
}
native_shortcode = 'en'
# https://docs.python.org/3/library/itertools.html#recipes
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
seen = set()
seen_add = seen.add
if key is None:
for element in itertools.filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def localizations_from_files(path_template, languages = None):
language_template = Template(path_template)
for language in (languages or valid_languages.keys()):
language_filepath = language_template.substitute(language = language)
if not os.path.isfile(language_filepath):
continue
with open(language_filepath, 'r', encoding='UTF-16LE') as language_file:
yield valid_languages[language], vdf.load(language_file)
if __name__ == '__main__':
import argparse, re, collections, functools
from string import Template
parser = argparse.ArgumentParser(
description = "Outputs a translation file with the given translation strings.",
usage = "%(prog)s [options]")
parser.add_argument('-l', '--language-template', metavar='FORMAT',
help="A file template string containing a ${language} substitution placeholder")
parser.add_argument('-f', '--translation-file', metavar='FILE',
help="A key/value file containing translation strings to be (re)generated")
parser.add_argument('--strip-color-bytes', action='store_true',
help="If present, will remove \\x01 and \\x03 codes")
parser.add_argument('--languages', metavar='LANG', nargs='*',
help="A list of languages to process")
args = parser.parse_args()
sections = None
with open(args.translation_file, encoding = 'utf8') as translation_file:
sections = vdf.load(translation_file, mapper = collections.OrderedDict)
phrases = sections['Phrases']
tokens = phrases.keys()
# gather and process desired translation strings into our output translation struct
for language, localization_file in localizations_from_files(args.language_template, args.languages):
localization = localization_file['lang']['Tokens']
for token in (t for t in tokens if t in localization):
translation = localization[token]
if args.strip_color_bytes:
translation = re.sub(r'|'.join(map(re.escape, [ '\x01', '\x02', '\x03', '\x04' ])), '', translation)
# handle %s1 replacement format
sourcemod_translation, count = re.subn(r'%s(\d+)', r'{\1}', translation)
# maintain existing '#format' entry if possible, user might edit type specifier
if count and '#format' not in phrases[token]:
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
phrases[token][language] = sourcemod_translation
# do named token substitution (%var%) as a second pass, maintaining native version order
for token, translation_phrase in phrases.items():
# ensure any duplicate names share the same argument number
# names will be ordered according to the language defined by native_shortcode
named_tokens = list(unique_everseen(re.findall(r'%\w+%', translation_phrase[native_shortcode])))
count = len(named_tokens)
if count:
# generate format string {n:s},{n+1:s}...
if '#format' not in phrases[token]:
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
# generate list of 2-tuple replacements where %some_token% is paired with {n}
repls = [(t, '{{{}}}'.format(n + 1)) for n, t in enumerate(named_tokens)]
# process the n-tuple replacements in each localization for the current phrase
for language, translation in translation_phrase.items():
sourcemod_translation = functools.reduce(lambda a, kv: a.replace(*kv), repls, translation)
phrases[token][language] = sourcemod_translation
# write the tokens as comma-separated values into a "#tokens" entry
phrases[token]['#tokens'] = ','.join(t.strip('%') for t in named_tokens)
# sort by language (#format first)
phrases[token] = collections.OrderedDict(sorted(phrases[token].items()))
with open(args.translation_file, 'w', encoding = 'utf8') as output_file:
vdf.dump(sections, output_file, pretty=True)
# print(vdf.dumps(sections, pretty=True))
@nosoop

This comment has been minimized.

Copy link
Owner Author

commented Jun 27, 2018

Sample usage:

Takes an input file $FILE with the following contents:

"Phrases"
{
	"TF_MaxTimeFmt"
	{
	}
	"Attrib_Sapper_Leaches_Health"
	{
	}
	"TF_PYRO_KILL_GRIND_LARGE_DESC"
	{
	}
	"TF_WeddingRing_ClientMessageBody"
	{
	}
}

Run the script like so:
python3 localization_to_sourcemod.py -l '/path/to/game/prefix_${language}.txt' -f $FILE --languages english spanish polish --strip-color-bytes

Replace the language template value with your game's language, where ${language} will be replaced with language names (e.g., if one file is l4d360ui_tu_danish.txt, you'll want the language template to be l4d360ui_tu_${language}.txt).

Not specifying any languages in the command line will make the plugin attempt to load every language specified in valid_languages (key is the language used in the template, value is the short language code used in SM's translation files).

After the script runs, $FILE should be updated with the following:

"Phrases"
{
	"TF_MaxTimeFmt"
	{
		"#format" "{1:s}"
		"en" "{1} Minutes"
		"es" "{1} minutos"
		"pl" "{1} min"
	}
	"Attrib_Sapper_Leaches_Health"
	{
		"#format" "{1:s}"
		"en" "+{1} health regenerated per second for each active sapper"
		"es" "+{1} de salud regenerada por segundo por cada zapador activo"
		"pl" "Regeneracja {1} pkt zdrowia na sekundę za każdy aktywny saper"
	}
	"TF_PYRO_KILL_GRIND_LARGE_DESC"
	{
		"en" "Kill 1000 enemies."
		"es" "Mata a 1000 enemigos."
		"pl" "Zabij 1000 przeciwników."
	}
	"TF_WeddingRing_ClientMessageBody"
	{
		"#format" "{1:s},{2:s},{3:s}"
		"#tokens" "receiver_name,gifter_name,ring_name"
		"en" "{1} has accepted {2}'s \"{3}\"! Congratulations!"
		"es" "¡{1} ha aceptado el \"{3}\" de {2}! ¡Enhorabuena!"
		"pl" "Gracz {1} przyjmuje „{3}” od gracza {2}! Gratulacje!"
	}
}

As you can see, it's capable of handling a variety of translation placeholder formats, though the #format will assume they are strings if not already defined.

Any changes to #format (e.g., to specify that an argument is a number) will persist between script executions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.