Skip to content

Instantly share code, notes, and snippets.

@dmukhg
Created August 29, 2016 19:39
Show Gist options
  • Save dmukhg/0cb31853bbcf0a9bbda95f2b547976ee to your computer and use it in GitHub Desktop.
Save dmukhg/0cb31853bbcf0a9bbda95f2b547976ee to your computer and use it in GitHub Desktop.
latex-control-char-replace.py uses regexes to replace latex control characters in tags other than tex-math
import re
from lxml import etree
def _escape_latex_control_chars_in(text):
text = re.sub('#', r'\#', text)
text = re.sub('{', r'\{', text)
text = re.sub('}', r'\}', text)
text = re.sub('~', r'\\textasciitilde', text)
text = re.sub('_', r'\_', text)
text = re.sub('\^', r'\\textasciicircum', text)
text = re.sub('%', r'\%', text)
text = re.sub('\$', r'\$', text)
return text
def escape_latex_control_chars(root):
for elem in root.iter():
text = elem.text
tail = elem.tail
if elem.tag != 'tex-math' and text and isinstance(text, (str, unicode)):
elem.text = _escape_latex_control_chars_in(text)
if tail and isinstance(tail, (str, unicode)):
elem.tail = _escape_latex_control_chars_in(tail)
def unicode_replace(in_filename, out_filename):
in_file = file(in_filename)
in_tree = etree.parse(in_file)
escape_latex_control_chars(in_tree)
in_tree.write(file(out_filename, 'w'))
def out_filename_for(in_filename):
return ''.join(in_filename.rsplit('.', 1)[:-1] + ['-unicode.xml'])
if __name__ == "__main__":
import sys
in_filename = sys.argv[1]
out_filename = out_filename_for(in_filename)
unicode_replace(in_filename, out_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment