Skip to content

Instantly share code, notes, and snippets.

@nosoop
Last active February 10, 2022 02:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save nosoop/0594c44cc2e644a1dd7e6d9580199724 to your computer and use it in GitHub Desktop.
Save nosoop/0594c44cc2e644a1dd7e6d9580199724 to your computer and use it in GitHub Desktop.
A script that imports game translation strings into a SourceMod phrase file.
import vdf, itertools, os
# imports choice game translation strings into a SourceMod-usable format.
valid_languages = {
'brazilian': 'pt', 'bulgarian': 'bg', 'czech': 'cze', 'danish': 'da', 'dutch': 'nl',
'english': 'en', 'finnish': 'fi', 'french': 'fr', 'german': 'de', 'greek': 'el',
'hungarian': 'hu', 'italian': 'it', 'japanese': 'ja', 'korean': 'ko',
'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt_p', 'romanian': 'ro', 'russian': 'ru',
'schinese': 'chi', 'spanish': 'es', 'swedish': 'sv', 'tchinese': 'zho', 'thai': 'th',
'turkish': 'tr', 'ukrainian': 'ua'
}
native_shortcode = 'en'
# https://docs.python.org/3/library/itertools.html#recipes
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
seen = set()
seen_add = seen.add
if key is None:
for element in itertools.filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def localizations_from_files(path_template, languages = None):
language_template = Template(path_template)
for language in (languages or valid_languages.keys()):
language_filepath = language_template.substitute(language = language)
if not os.path.isfile(language_filepath):
continue
with open(language_filepath, 'r', encoding='UTF-16LE') as language_file:
yield valid_languages[language], vdf.load(language_file)
if __name__ == '__main__':
import argparse, re, collections, functools
from string import Template
parser = argparse.ArgumentParser(
description = "Outputs a translation file with the given translation strings.",
usage = "%(prog)s [options]")
parser.add_argument('-l', '--language-template', metavar='FORMAT',
help="A file template string containing a ${language} substitution placeholder")
parser.add_argument('-f', '--translation-file', metavar='FILE',
help="A key/value file containing translation strings to be (re)generated")
parser.add_argument('--strip-color-bytes', action='store_true',
help="If present, will remove \\x01 and \\x03 codes")
parser.add_argument('--languages', metavar='LANG', nargs='*',
help="A list of languages to process")
args = parser.parse_args()
sections = None
with open(args.translation_file, encoding = 'utf8') as translation_file:
sections = vdf.load(translation_file, mapper = collections.OrderedDict)
phrases = sections['Phrases']
tokens = phrases.keys()
# gather and process desired translation strings into our output translation struct
for language, localization_file in localizations_from_files(args.language_template, args.languages):
localization = localization_file['lang']['Tokens']
for token in (t for t in tokens if t in localization):
translation = localization[token]
if args.strip_color_bytes:
translation = re.sub(r'|'.join(map(re.escape, [ '\x01', '\x02', '\x03', '\x04' ])), '', translation)
# handle %s1 replacement format
sourcemod_translation, count = re.subn(r'%s(\d+)', r'{\1}', translation)
# maintain existing '#format' entry if possible, user might edit type specifier
if count and '#format' not in phrases[token]:
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
phrases[token][language] = sourcemod_translation
# do named token substitution (%var%) as a second pass, maintaining native version order
for token, translation_phrase in phrases.items():
# ensure any duplicate names share the same argument number
# names will be ordered according to the language defined by native_shortcode
named_tokens = list(unique_everseen(re.findall(r'%\w+%', translation_phrase[native_shortcode])))
count = len(named_tokens)
if count:
# generate format string {n:s},{n+1:s}...
if '#format' not in phrases[token]:
phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
# generate list of 2-tuple replacements where %some_token% is paired with {n}
repls = [(t, '{{{}}}'.format(n + 1)) for n, t in enumerate(named_tokens)]
# process the n-tuple replacements in each localization for the current phrase
for language, translation in translation_phrase.items():
sourcemod_translation = functools.reduce(lambda a, kv: a.replace(*kv), repls, translation)
phrases[token][language] = sourcemod_translation
# write the tokens as comma-separated values into a "#tokens" entry
phrases[token]['#tokens'] = ','.join(t.strip('%') for t in named_tokens)
# sort by language (#format first)
phrases[token] = collections.OrderedDict(sorted(phrases[token].items()))
with open(args.translation_file, 'w', encoding = 'utf8') as output_file:
vdf.dump(sections, output_file, pretty=True)
# print(vdf.dumps(sections, pretty=True))
@nosoop
Copy link
Author

nosoop commented Jun 27, 2018

Sample usage:

Takes an input file $FILE with the following contents:

"Phrases"
{
	"TF_MaxTimeFmt"
	{
	}
	"Attrib_Sapper_Leaches_Health"
	{
	}
	"TF_PYRO_KILL_GRIND_LARGE_DESC"
	{
	}
	"TF_WeddingRing_ClientMessageBody"
	{
	}
}

Run the script like so:
python3 localization_to_sourcemod.py -l '/path/to/game/prefix_${language}.txt' -f $FILE --languages english spanish polish --strip-color-bytes

If on Windows, specify -l with double quotes instead of single (e.g. "X:\path\to\game\prefix_${language}.txt"); it's single quoted on Linux to avoid shell substitutions.

Replace the language template value with your game's language, where ${language} will be replaced with language names (e.g., if one file is l4d360ui_tu_danish.txt, you'll want the language template to be l4d360ui_tu_${language}.txt).

Not specifying any languages in the command line will make the plugin attempt to load every language specified in valid_languages (key is the language used in the template, value is the short language code used in SM's translation files).

After the script runs, $FILE should be updated with the following:

"Phrases"
{
	"TF_MaxTimeFmt"
	{
		"#format" "{1:s}"
		"en" "{1} Minutes"
		"es" "{1} minutos"
		"pl" "{1} min"
	}
	"Attrib_Sapper_Leaches_Health"
	{
		"#format" "{1:s}"
		"en" "+{1} health regenerated per second for each active sapper"
		"es" "+{1} de salud regenerada por segundo por cada zapador activo"
		"pl" "Regeneracja {1} pkt zdrowia na sekundę za każdy aktywny saper"
	}
	"TF_PYRO_KILL_GRIND_LARGE_DESC"
	{
		"en" "Kill 1000 enemies."
		"es" "Mata a 1000 enemigos."
		"pl" "Zabij 1000 przeciwników."
	}
	"TF_WeddingRing_ClientMessageBody"
	{
		"#format" "{1:s},{2:s},{3:s}"
		"#tokens" "receiver_name,gifter_name,ring_name"
		"en" "{1} has accepted {2}'s \"{3}\"! Congratulations!"
		"es" "¡{1} ha aceptado el \"{3}\" de {2}! ¡Enhorabuena!"
		"pl" "Gracz {1} przyjmuje „{3}” od gracza {2}! Gratulacje!"
	}
}

As you can see, it's capable of handling a variety of translation placeholder formats, though the #format will assume they are strings if not already defined.

Any changes to #format (e.g., to specify that an argument is a number) will persist between script executions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment