Skip to content

Instantly share code, notes, and snippets.

@stevygee
Last active August 28, 2019 12:30
Show Gist options
  • Save stevygee/279fe6887af0d4791cceaf947889fe8c to your computer and use it in GitHub Desktop.
Save stevygee/279fe6887af0d4791cceaf947889fe8c to your computer and use it in GitHub Desktop.
Process Localise XLIFF exports for importing back into WPML by inserting translation job IDs from source files
# coding=utf-8
import os
import argparse
from glob import glob
import xml.etree.ElementTree as ET
import zipfile
namespace = 'urn:oasis:names:tc:xliff:document:1.2'
namespaces = {'xliff': namespace}
errors = 0
def get_current_language(filename):
tree = ET.parse(filename)
root = tree.getroot()
target_lang = None
# find current language
for type_tag in root.findall('xliff:file', namespaces):
target_lang = type_tag.get('target-language')
#print('Language detected: {0}'.format(target_lang))
return target_lang
def get_original(path, language):
global errors
matches = 0
attrib_original = ''
print('Searching for source file with language {0}'.format(language))
# search for files recursively
result = [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*.xliff'))]
for file in result:
if os.path.isdir(file):
continue
tree = ET.parse(file)
root = tree.getroot()
for type_tag in root.findall('xliff:file', namespaces):
job = type_tag.get('original')
target_lang = type_tag.get('target-language')
#print(file, target_lang)
if target_lang == language:
matches += 1
attrib_original = job
print('Matching file found: {0} ({1}, {2})'.format(file, target_lang, attrib_original))
if matches > 1:
print('Error: Multiple matches ({0}), please make sure there are no duplicate languages (are you processing more than 1 page)?'.format(matches))
errors += 1
return None
if matches == 1:
return attrib_original
print('Error: No matches found')
errors += 1
return None
def set_original(source_filename, target_filename, value):
tree = ET.parse(source_filename)
root = tree.getroot()
target_lang = None
# find and set original attribute
for type_tag in root.findall('xliff:file', namespaces):
job = type_tag.get('original')
#print(job)
type_tag.set('original', value)
print('Writing {0}'.format(target_filename))
tree.write(target_filename, encoding="utf-8", xml_declaration=True)
def create_archive(path, prefix):
os.chdir(path)
zip_filename = 'archive.zip'
print('---')
print('Writing {0}'.format(os.path.join(path, zip_filename)))
zf = zipfile.ZipFile(zip_filename, mode='w')
pattern = '*.xliff'
if prefix != '':
pattern = prefix + '-*.xliff'
try:
# search for files recursively
result = [y for x in os.walk('./') for y in glob(pattern)]
for file in result:
if os.path.isdir(file):
continue
print('adding {0}'.format(file))
zf.write(file)
finally:
zf.close()
def main():
parser = argparse.ArgumentParser(description="Prepares XLIFF export from Lokalise for importing into WPML. Looks for XLIFF files in paths and generates an archive at the output path.")
parser.add_argument('wpml_path', help='Directory containing XLIFF files exported using WPML')
parser.add_argument('lokalise_path', help='Directory containing XLIFF files exported using Lokalise')
parser.add_argument('output_path', nargs='?', default=os.getcwd(), help='XLIFF files and archive will be stored here')
parser.add_argument('--output_prefix', default='REPLACED')
args = parser.parse_args()
# setup XML namespaces for XLIFF
namespace = 'urn:oasis:names:tc:xliff:document:1.2'
ET.register_namespace('', namespace)
namespaces = {'xliff': namespace}
# search for files recursively
result = [y for x in os.walk(args.lokalise_path) for y in glob(os.path.join(x[0], '*.xliff'))]
for file in result:
if os.path.isdir(file):
continue
print('---')
print('Processing {0}'.format(file))
# get language of current file
current_language = get_current_language(file)
if current_language == None:
continue
# find original file for current language
attrib_original = get_original(args.wpml_path, current_language)
if attrib_original == None:
continue
# write out new file
job = attrib_original.split("-")
job_id = job[0]
prefix = ''
if args.output_prefix != '':
prefix = args.output_prefix + '-'
output_filename = '{0}translation-job-{1}.xliff'.format(prefix, job_id)
set_original(file, os.path.join(args.output_path, output_filename), attrib_original)
# create ZIP in output path
create_archive(args.output_path, args.output_prefix)
if errors == 0:
print('Done.')
else:
print('Done with {0} errors, see above.'.format(errors))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment