Skip to content

Instantly share code, notes, and snippets.

@tipabu

tipabu/gibber.py

Last active May 26, 2016
Embed
What would you like to do?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
A Python gibberizer for (swift) locale strings.
This will extract all of the strings currently marked for translation,
swap some of the ASCII letters (like 'A') for non-ASCII look-alikes (like 'Á'),
extend them with some padding characters to test label-widths, and
create *.po and *.mo files that may be used by Python's gettext.
After running this, you may want to restart swift with:
SWIFT_LOCALEDIR=swift/locale/ LANG=en_ZZ swift-init all restart
"""
import argparse
import errno
import math
import os
import random
import re
import sys
from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS
import polib
from six import unichr
def gibber(str, chance=0.50):
"""Translate (some) characters in a string to Gibberish."""
return ''.join(
random.choice(gibber.map[c])
if c in gibber.map and random.random() < chance
else c
for c in str)
gibber.map = {
'A': [unichr(c) for c in range(192, 199)],
'C': u'\xC7',
'D': u'\xD0',
'E': [unichr(c) for c in range(200, 204)],
'I': [unichr(c) for c in range(204, 208)],
'N': u'\xD1',
'O': [unichr(c) for c in range(210, 215)] + [u'\xD8'],
'U': [unichr(c) for c in range(217, 221)],
'Y': [u'\xDD', u'\u0178'],
'a': [unichr(c) for c in range(224, 231)],
'c': u'\xE7',
'd': u'\xF0',
'e': [unichr(c) for c in range(232, 236)],
'i': [unichr(c) for c in range(236, 240)],
'n': u'\xF1',
'o': [unichr(c) for c in range(242, 247)] + [u'\xF8'],
'u': [unichr(c) for c in range(249, 253)],
'y': [u'\xFD', u'\xFF'],
}
def main(argv):
class _formatter(argparse.RawTextHelpFormatter):
# Cribbed from argparse.ArgumentDefaultsHelpFormatter
def _get_help_string(self, action):
help = action.help
if all([
'%(default)' not in action.help,
action.default is not argparse.SUPPRESS,
action.option_strings or
action.nargs in (argparse.OPTIONAL,
argparse.ZERO_OR_MORE)]):
help += '\n(default: %(default)s)'
return help
def percentage(val):
val = float(val)
if val < 0 or val > 1:
raise ValueError('value is not in range [0, 1]')
return val
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=_formatter)
parser.add_argument(
'-d', '--directory', default='swift',
help='directory from which to extract messages')
parser.add_argument(
'-o', '--output',
default='swift/locale/en_ZZ/LC_MESSAGES/swift.po',
help='*.po file to create')
parser.add_argument(
'-g', '--gibberize', type=percentage, default=0.5, metavar="%",
help='fraction of gibberizable characters to "translate"')
parser.add_argument(
'-p', '--padding', type=percentage, default=0.3, metavar="%",
help='fraction of original string length to pad')
args = parser.parse_args(argv)
formatting_regex = re.compile('(%(?:\([^\)]*\))?[0-9.*]*[sdxfr]|{[^}]*})')
# Use same additional keywords as project-config's
# jenkins/scripts/common_translation_update.sh
keywords = dict(DEFAULT_KEYWORDS)
keywords.update({
'_C': ((1, 'c'), 2),
'_P': (1, 2),
})
po_out = polib.POFile()
for _filename, _line, message, _comments, _context in \
extract_from_dir(args.directory, keywords=keywords):
translated = ''.join(
s if i % 2 else gibber(s, args.gibberize)
for i, s in enumerate(formatting_regex.split(message)))
if args.padding:
to_pad = max(5, int(math.ceil(args.padding * len(message))))
translated += u'\u2022' * to_pad
po_out.append(polib.POEntry(msgid=message, msgstr=translated))
try:
os.makedirs(os.path.dirname(args.output))
except OSError as e:
if e.errno != errno.EEXIST:
raise
po_out.save(args.output)
po_out.save_as_mofile(os.path.splitext(args.output)[0] + '.mo')
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment