nosoop/localization_to_sourcemod.py

## localization_to_sourcemod.py
import vdf, itertools, os

# imports choice game translation strings into a SourceMod-usable format.

valid_languages = {
	'brazilian': 'pt', 'bulgarian': 'bg', 'czech': 'cze', 'danish': 'da', 'dutch': 'nl',
	'english': 'en', 'finnish': 'fi', 'french': 'fr', 'german': 'de', 'greek': 'el',
	'hungarian': 'hu', 'italian': 'it', 'japanese': 'ja', 'korean': 'ko',
	'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt_p', 'romanian': 'ro', 'russian': 'ru',
	'schinese': 'chi', 'spanish': 'es', 'swedish': 'sv', 'tchinese': 'zho', 'thai': 'th',
	'turkish': 'tr', 'ukrainian': 'ua'
}
native_shortcode = 'en'

# https://docs.python.org/3/library/itertools.html#recipes
def unique_everseen(iterable, key=None):
	"List unique elements, preserving order. Remember all elements ever seen."
	seen = set()
	seen_add = seen.add
	if key is None:
		for element in itertools.filterfalse(seen.__contains__, iterable):
			seen_add(element)
			yield element
	else:
		for element in iterable:
			k = key(element)
			if k not in seen:
				seen_add(k)
				yield element

def localizations_from_files(path_template, languages = None):
	language_template = Template(path_template)

	for language in (languages or valid_languages.keys()):
		language_filepath = language_template.substitute(language = language)

		if not os.path.isfile(language_filepath):
			continue

		with open(language_filepath, 'r', encoding='UTF-16LE') as language_file:
			yield valid_languages[language], vdf.load(language_file)

if __name__ == '__main__':
	import argparse, re, collections, functools
	from string import Template

	parser = argparse.ArgumentParser(
			description = "Outputs a translation file with the given translation strings.",
			usage = "%(prog)s [options]")

	parser.add_argument('-l', '--language-template', metavar='FORMAT',
			help="A file template string containing a ${language} substitution placeholder")
	parser.add_argument('-f', '--translation-file', metavar='FILE',
			help="A key/value file containing translation strings to be (re)generated")
	parser.add_argument('--strip-color-bytes', action='store_true',
			help="If present, will remove \\x01 and \\x03 codes")
	parser.add_argument('--languages', metavar='LANG', nargs='*',
			help="A list of languages to process")

	args = parser.parse_args()

	sections = None
	with open(args.translation_file, encoding = 'utf8') as translation_file:
		sections = vdf.load(translation_file, mapper = collections.OrderedDict)

		phrases = sections['Phrases']
		tokens = phrases.keys()

		# gather and process desired translation strings into our output translation struct
		for language, localization_file in localizations_from_files(args.language_template, args.languages):
			localization = localization_file['lang']['Tokens']
			for token in (t for t in tokens if t in localization):
				translation = localization[token]

				if args.strip_color_bytes:
					translation = re.sub(r'|'.join(map(re.escape, [ '\x01', '\x02', '\x03', '\x04' ])), '', translation)

				# handle %s1 replacement format
				sourcemod_translation, count = re.subn(r'%s(\d+)', r'{\1}', translation)

				# maintain existing '#format' entry if possible, user might edit type specifier
				if count and '#format' not in phrases[token]:
					phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
				phrases[token][language] = sourcemod_translation

		# do named token substitution (%var%) as a second pass, maintaining native version order
		for token, translation_phrase in phrases.items():
			# ensure any duplicate names share the same argument number
			# names will be ordered according to the language defined by native_shortcode
			named_tokens = list(unique_everseen(re.findall(r'%\w+%', translation_phrase[native_shortcode])))
			count = len(named_tokens)

			if count:
				# generate format string {n:s},{n+1:s}...
				if '#format' not in phrases[token]:
					phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))

				# generate list of 2-tuple replacements where %some_token% is paired with {n}
				repls = [(t, '{{{}}}'.format(n + 1)) for n, t in enumerate(named_tokens)]

				# process the n-tuple replacements in each localization for the current phrase
				for language, translation in translation_phrase.items():
					sourcemod_translation = functools.reduce(lambda a, kv: a.replace(*kv), repls, translation)
					phrases[token][language] = sourcemod_translation

				# write the tokens as comma-separated values into a "#tokens" entry
				phrases[token]['#tokens'] = ','.join(t.strip('%') for t in named_tokens)

			# sort by language (#format first)
			phrases[token] = collections.OrderedDict(sorted(phrases[token].items()))

	with open(args.translation_file, 'w', encoding = 'utf8') as output_file:
		vdf.dump(sections, output_file, pretty=True)

	# print(vdf.dumps(sections, pretty=True))
	import vdf, itertools, os

	# imports choice game translation strings into a SourceMod-usable format.

	valid_languages = {
	'brazilian': 'pt', 'bulgarian': 'bg', 'czech': 'cze', 'danish': 'da', 'dutch': 'nl',
	'english': 'en', 'finnish': 'fi', 'french': 'fr', 'german': 'de', 'greek': 'el',
	'hungarian': 'hu', 'italian': 'it', 'japanese': 'ja', 'korean': 'ko',
	'norwegian': 'no', 'polish': 'pl', 'portuguese': 'pt_p', 'romanian': 'ro', 'russian': 'ru',
	'schinese': 'chi', 'spanish': 'es', 'swedish': 'sv', 'tchinese': 'zho', 'thai': 'th',
	'turkish': 'tr', 'ukrainian': 'ua'
	}
	native_shortcode = 'en'

	# https://docs.python.org/3/library/itertools.html#recipes
	def unique_everseen(iterable, key=None):
	"List unique elements, preserving order. Remember all elements ever seen."
	seen = set()
	seen_add = seen.add
	if key is None:
	for element in itertools.filterfalse(seen.__contains__, iterable):
	seen_add(element)
	yield element
	else:
	for element in iterable:
	k = key(element)
	if k not in seen:
	seen_add(k)
	yield element

	def localizations_from_files(path_template, languages = None):
	language_template = Template(path_template)

	for language in (languages or valid_languages.keys()):
	language_filepath = language_template.substitute(language = language)

	if not os.path.isfile(language_filepath):
	continue

	with open(language_filepath, 'r', encoding='UTF-16LE') as language_file:
	yield valid_languages[language], vdf.load(language_file)

	if __name__ == '__main__':
	import argparse, re, collections, functools
	from string import Template

	parser = argparse.ArgumentParser(
	description = "Outputs a translation file with the given translation strings.",
	usage = "%(prog)s [options]")

	parser.add_argument('-l', '--language-template', metavar='FORMAT',
	help="A file template string containing a ${language} substitution placeholder")
	parser.add_argument('-f', '--translation-file', metavar='FILE',
	help="A key/value file containing translation strings to be (re)generated")
	parser.add_argument('--strip-color-bytes', action='store_true',
	help="If present, will remove \\x01 and \\x03 codes")
	parser.add_argument('--languages', metavar='LANG', nargs='*',
	help="A list of languages to process")

	args = parser.parse_args()

	sections = None
	with open(args.translation_file, encoding = 'utf8') as translation_file:
	sections = vdf.load(translation_file, mapper = collections.OrderedDict)

	phrases = sections['Phrases']
	tokens = phrases.keys()

	# gather and process desired translation strings into our output translation struct
	for language, localization_file in localizations_from_files(args.language_template, args.languages):
	localization = localization_file['lang']['Tokens']
	for token in (t for t in tokens if t in localization):
	translation = localization[token]

	if args.strip_color_bytes:
	translation = re.sub(r'\|'.join(map(re.escape, [ '\x01', '\x02', '\x03', '\x04' ])), '', translation)

	# handle %s1 replacement format
	sourcemod_translation, count = re.subn(r'%s(\d+)', r'{\1}', translation)

	# maintain existing '#format' entry if possible, user might edit type specifier
	if count and '#format' not in phrases[token]:
	phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))
	phrases[token][language] = sourcemod_translation

	# do named token substitution (%var%) as a second pass, maintaining native version order
	for token, translation_phrase in phrases.items():
	# ensure any duplicate names share the same argument number
	# names will be ordered according to the language defined by native_shortcode
	named_tokens = list(unique_everseen(re.findall(r'%\w+%', translation_phrase[native_shortcode])))
	count = len(named_tokens)

	if count:
	# generate format string {n:s},{n+1:s}...
	if '#format' not in phrases[token]:
	phrases[token]['#format'] = ','.join('{{{}:s}}'.format(n + 1) for n in range(count))

	# generate list of 2-tuple replacements where %some_token% is paired with {n}
	repls = [(t, '{{{}}}'.format(n + 1)) for n, t in enumerate(named_tokens)]

	# process the n-tuple replacements in each localization for the current phrase
	for language, translation in translation_phrase.items():
	sourcemod_translation = functools.reduce(lambda a, kv: a.replace(*kv), repls, translation)
	phrases[token][language] = sourcemod_translation

	# write the tokens as comma-separated values into a "#tokens" entry
	phrases[token]['#tokens'] = ','.join(t.strip('%') for t in named_tokens)

	# sort by language (#format first)
	phrases[token] = collections.OrderedDict(sorted(phrases[token].items()))

	with open(args.translation_file, 'w', encoding = 'utf8') as output_file:
	vdf.dump(sections, output_file, pretty=True)

	# print(vdf.dumps(sections, pretty=True))