Skip to content

Instantly share code, notes, and snippets.

@vbraun
Last active August 26, 2021 10:10
Show Gist options
  • Save vbraun/254b7f21d027f0ff08a541db5fc76c5a to your computer and use it in GitHub Desktop.
Save vbraun/254b7f21d027f0ff08a541db5fc76c5a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
from lxml import etree
def filter_trans_units(xliff):
"""
Filter trans units
Modifies the input xliff etree in place
"""
root = xliff.getroot()
namespace = xliff.getroot().nsmap[None]
to_delete = []
for trans_unit in xliff.iter('{' + namespace + '}trans-unit'):
# note: must not modify etree during iteration
if not is_good_trans_unit(trans_unit, namespace):
to_delete.append(trans_unit)
for trans_unit in to_delete:
parent = trans_unit.getparent()
parent.remove(trans_unit)
def is_good_trans_unit(trans_unit, namespace):
"""
Test whether to keep a given trans_unit subtree of the xliff file
"""
return not all(
is_context_irrelevant(context)
for context in trans_unit.iter('{' + namespace + '}context')
)
def is_context_irrelevant(context):
"""
Return whether to ignore a trans-unit with this context
"""
if context.attrib.get('context-type') != 'sourcefile':
return True
return context.text.startswith('node_modules/@progress')
if __name__ == '__main__':
"""
Use as: format-xliff.py input.xlf output.xlf
"""
command_name, input_filename, output_filename = sys.argv
parser = etree.XMLParser(remove_blank_text = True)
with open(input_filename) as f:
xml = etree.parse(f, parser)
filter_trans_units(xml)
pretty_xml = etree.tostring(xml, pretty_print=True, xml_declaration=True, encoding='UTF-8')
# print(pretty_xml.decode('utf-8'))
with open(output_filename, 'wb') as f:
f.write(pretty_xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment