Skip to content

Instantly share code, notes, and snippets.

Last active February 5, 2019 13:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexshpilkin/fc5ac7d5aad9332f4471fb2bdf6221a4 to your computer and use it in GitHub Desktop.
Save alexshpilkin/fc5ac7d5aad9332f4471fb2bdf6221a4 to your computer and use it in GitHub Desktop.
Convert between Transtool and GNU gettext files
#!/bin/sh -eu
po=$(printf %s *.??.po)
detail=false; if [ "x${1-}" = x-d ]; then detail=true; shift; fi
prev=; next=; diff=
trap 'rm -f "$prev" "$next" "$diff"' EXIT
prev=$(mktemp); next=$(mktemp); diff=$(mktemp)
msggrep -T -e . <"$base" >"$prev"
msggrep -T -e . <"$po" >"$next"
printf '# %s source, %s target\n' \
$(msgcomm '->' 1 '-<' 3 "$prev" "$next" - <"$next" | po2ox | ox2txt | wc -w) \
$(msgcomm '->' 1 '-<' 3 "$prev" "$next" - <"$next" | po2tx | tx2txt | wc -w)
msgcat "$prev" "$next" | msggrep -T -e '#-#-#-#-#' | msgcomm "$next" - >"$diff"
if $detail; then
msgcomm '->' 1 '-<' 3 --force-po "$prev" "$next" - <"$next" | \
msgcat -i --color=always - "$diff"
#!/usr/bin/env python3
from csv import reader as CSVReader
from datetime import datetime as DateTime
from re import compile as Pattern
from sys import stdin, stdout
(r'"', r'\"', ),
(r'\n', r'\n'),
(r'\\', r'\\'),
ESCAPE = [(Pattern(x), y) for x, y in ESCAPE]
print(r"""# Converted from CSV by csv2po
msgid ""
msgstr ""
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset={}\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Transtool-Escapes: 1\n"
""".format(stdout.encoding), end='')
for entry in CSVReader(stdin):
assert len(entry) == 5
# FIXME doesn't touch fuzzy flags
src = entry[2]
tar = entry[3]
for pat, rep in ESCAPE:
src = pat.sub(rep, src)
tar = pat.sub(rep, tar)
{}#, python-brace-format
msgctxt "{}"
msgid "{}"
msgstr "{}"
""".format("# {}\n".format(entry[4]) if entry[4] else "", entry[0], src, tar), end='')
#!/bin/sh -eu
force=false; if [ "x${1-}" = x-f ]; then force=true; shift; fi
po=$(printf %s *.??.po)
pd="${po%.po}.$(date -Imin)"
cp "$po" "$pd.po"
po2tx <"$pd.po" >"$pd.tx"
prev=; next=; diff=
trap 'rm -f "$prev" "$next" "$diff"' EXIT
prev=$(mktemp); next=$(mktemp); diff=$(mktemp)
msggrep -T -e . <"$base" >"$prev"
msggrep -T -e . <"$pd.po" >"$next"
{ msgcomm '-<' 2 "$prev" "$next" - <"$next" | diff /dev/null -; } || $force
{ msgcomm "$next" "$prev" | diff "$prev" -; } || $force
msgcat "$prev" "$next" | msggrep -T -e '#-#-#-#-#' | msgcomm "$next" - >"$diff"
msgcomm '->' 1 '-<' 3 --force-po "$prev" "$next" - <"$next" | \
msgcat - "$diff" | po2csv | sort -n -t'"' -k2 >"$pd.utf.csv"
sed -E -e 's/\{LS\}/<LS>/g' -e 's/\{[^{}]+\}//g' <"$pd.utf.csv" | \
iconv -t cp1251//TRANSLIT >"$"
#!/bin/sh -eu
if [ -f "$next.po" ]; then
echo "file already exists: $next.po" >&2
exit 1
csv2po <"$next.utf.csv" | msgcat '->' 0 "$prev" - >"$next.po"
msgcomm -u "$prev" "$next" | diff /dev/null -
#!/usr/bin/env python3
# ox2pot - convert Transtool to GNU gettext
# Converts UTF-8 Transtool OX on standard input to GNU gettext POT on standard
# output.
from datetime import datetime as DateTime
from re import compile as Pattern
from sys import stdin, stdout
(r'\\', r'\\\\'),
(r'"', r'\\"'),
(r'{', r'{{'),
(r'}', r'}}'),
('\u2028', r'{LS}'),
(r'<\*([0-9]+)\*>', r'{as\1}'),
(r'<cs([0-9]+)>', r'{cs\1}'),
(r'</cs([0-9]+)>', r'{sc\1}'),
(r'<U([0-9]+)>', r'{uu\1}'),
(r'<\?ACE 7\?>', r'{pi}'),
ESCAPE = [(Pattern(x), y) for x, y in ESCAPE]
stdin.reconfigure(encoding='utf-8-sig', newline='\r\n')
lines = iter(stdin)
print(r'''# Converted from Transtool by ox2pot
#, fuzzy
msgid ""
msgstr ""
"POT-Creation-Date: {}\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset={}\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Transtool-Escapes: 1\n"
'''.format(' ', timespec='minutes'),
), end='')
while True:
line = next(lines, None)
if line is None: break
assert line.startswith('(') and line.endswith(')\r\n')
snum = line[1:-3]
assert snum and snum.isdigit()
line = next(lines, None)
assert line.endswith('\r\n')
stxt = line[:-2]
assert '{' not in stxt and '}' not in stxt
for pat, rep in ESCAPE:
stxt = pat.sub(rep, stxt)
assert '<' not in stxt and '>' not in stxt
#, python-brace-format
msgctxt "{}"
msgid "{}"
msgstr ""
'''.format(snum, stxt), end='')
#!/usr/bin/env python3
from csv import writer as CSVWriter, QUOTE_NONNUMERIC
from email import parser, policy
from re import compile as Pattern
from sys import argv, stdin, stdout
(r'\\"', r'"'),
(r'\\n', '\n'),
(r'\\\\', r'\\'),
UNESCAPE = [(Pattern(x), y) for x, y in UNESCAPE]
OUTPUT = 'msgid' if argv[0].lower().endswith('2ox') else 'msgstr'
stdout.reconfigure(encoding='utf-8-sig', newline='\r\n')
def readentry():
entry = {}
line = next(stdin, None)
if line is None: return None
while True:
assert line is not None
if not line.isspace(): break
line = next(stdin, None)
while True:
assert line is not None
line = line.strip()
if not line.startswith('#'): break
mark, line = line[:2], line[2:]
if mark not in entry:
entry[mark] = line
entry[mark] += " " + line
line = next(stdin, None)
while True:
assert line is not None
key, line = line.split(maxsplit=1)
entry[key] = ""
while True:
assert (len(line) >= 2 and
line.startswith('"') and
line = line[1:-1]
for pat, rep in UNESCAPE:
line = pat.sub(rep, line)
entry[key] += line
line = next(stdin, None)
if line is None or line.isspace(): break
line = line.strip()
if not line.startswith('"'): break
if line is None or line.isspace(): break
return entry
def readentries():
while True:
e = readentry()
if e is None: break
yield e
entries = iter(readentries())
entry = next(readentries())
assert not entry['msgid']
headers = parser.Parser(policy=policy.default).parsestr(entry['msgstr'])
assert (headers['MIME-Version'] == '1.0' and
headers['Content-Type'].params['charset'].lower() == stdin.encoding.lower() and
headers['Content-Transfer-Encoding'].lower() == '8bit')
csv = CSVWriter(stdout, quoting=QUOTE_NONNUMERIC)
for entry in entries:
'?' if 'fuzzy' in entry.get('#,','') else '',
entry.get('# ', '')
#!/usr/bin/env python3
# po2ox, po2tx - convert GNU gettext to Transtool
# Converts GNU gettext PO on standard input to UTF-8 Transtool OX (if invoked
# as po2ox) or TX (otherwise) on standard output.
from email import parser, policy
from re import compile as Pattern
from sys import argv, stdin, stdout
PO2TT = [
(r'{pi}', r'<?ACE 7?>'),
(r'{uu([0-9]+)}', r'<U\1>'),
(r'{sc([0-9]+)}', r'</cs\1>'),
(r'{cs([0-9]+)}', r'<cs\1>'),
(r'{as([0-9]+)}', r'<*\1*>'),
(r'{LS}', '\u2028'),
(r'}}', r'}'),
(r'{{', r'{'),
PO2TT = [(Pattern(x), y) for x, y in PO2TT]
(r'\\"', r'"'),
(r'\\n', '\n'),
(r'\\\\', r'\\'),
UNESCAPE = [(Pattern(x), y) for x, y in UNESCAPE]
OUTPUT = 'msgid' if argv[0].lower().endswith('2ox') else 'msgstr'
stdout.reconfigure(encoding='utf-8-sig', newline='\r\n')
def readentry():
entry = {}
line = next(stdin, None)
if line is None: return None
while True:
assert line is not None
if not line.isspace(): break
line = next(stdin, None)
while True:
assert line is not None
if not line.strip().startswith('#'): break
line = next(stdin, None)
while True:
assert line is not None
key, line = line.strip().split(maxsplit=1)
entry[key] = ""
while True:
assert (len(line) >= 2 and
line.startswith('"') and
line = line[1:-1]
for pat, rep in UNESCAPE:
line = pat.sub(rep, line)
entry[key] += line
line = next(stdin, None)
if line is None or line.isspace(): break
line = line.strip()
if not line.startswith('"'): break
if line is None or line.isspace(): break
return entry
def readentries():
while True:
e = readentry()
if e is None: break
yield e
entries = iter(readentries())
entry = next(readentries())
assert not entry['msgid']
headers = parser.Parser(policy=policy.default).parsestr(entry['msgstr'])
assert (headers['MIME-Version'] == '1.0' and
headers['Content-Type'].params['charset'].lower() == stdin.encoding.lower() and
headers['Content-Transfer-Encoding'].lower() == '8bit' and
headers['X-Transtool-Escapes'] == '1')
for entry in entries:
assert entry['msgctxt'].isdigit()
text = entry[OUTPUT]
assert '<' not in text and '>' not in text
for pat, rep in PO2TT:
text = pat.sub(rep, text)
assert '{' not in text and '}' not in text
#!/usr/bin/env python3
# ox2txt, tx2txt - extract text from Transtool
# Prints to standard output the text from a UTF-8 Transtool OX or TX file on
# standard input. The output can then be piped to wc(1) or similar.
from sys import stdin
from re import compile as Pattern
PH = Pattern(r'<[^>]*>')
stdin.reconfigure(encoding='utf-8-sig', newline='\r\n')
lines = iter(stdin)
while True:
line = next(stdin, None)
if line is None: break
line = next(stdin, None)
line = PH.sub(' ', line)
assert line is not None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment