Skip to content

Instantly share code, notes, and snippets.



Last active May 26, 2016
What would you like to do?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
A Python gibberizer for (swift) locale strings.
This will extract all of the strings currently marked for translation,
swap some of the ASCII letters (like 'A') for non-ASCII look-alikes (like 'Á'),
extend them with some padding characters to test label-widths, and
create *.po and *.mo files that may be used by Python's gettext.
After running this, you may want to restart swift with:
SWIFT_LOCALEDIR=swift/locale/ LANG=en_ZZ swift-init all restart
import argparse
import errno
import math
import os
import random
import re
import sys
from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS
import polib
from six import unichr
def gibber(str, chance=0.50):
"""Translate (some) characters in a string to Gibberish."""
return ''.join(
if c in and random.random() < chance
else c
for c in str) = {
'A': [unichr(c) for c in range(192, 199)],
'C': u'\xC7',
'D': u'\xD0',
'E': [unichr(c) for c in range(200, 204)],
'I': [unichr(c) for c in range(204, 208)],
'N': u'\xD1',
'O': [unichr(c) for c in range(210, 215)] + [u'\xD8'],
'U': [unichr(c) for c in range(217, 221)],
'Y': [u'\xDD', u'\u0178'],
'a': [unichr(c) for c in range(224, 231)],
'c': u'\xE7',
'd': u'\xF0',
'e': [unichr(c) for c in range(232, 236)],
'i': [unichr(c) for c in range(236, 240)],
'n': u'\xF1',
'o': [unichr(c) for c in range(242, 247)] + [u'\xF8'],
'u': [unichr(c) for c in range(249, 253)],
'y': [u'\xFD', u'\xFF'],
def main(argv):
class _formatter(argparse.RawTextHelpFormatter):
# Cribbed from argparse.ArgumentDefaultsHelpFormatter
def _get_help_string(self, action):
help =
if all([
'%(default)' not in,
action.default is not argparse.SUPPRESS,
action.option_strings or
action.nargs in (argparse.OPTIONAL,
help += '\n(default: %(default)s)'
return help
def percentage(val):
val = float(val)
if val < 0 or val > 1:
raise ValueError('value is not in range [0, 1]')
return val
parser = argparse.ArgumentParser(
'-d', '--directory', default='swift',
help='directory from which to extract messages')
'-o', '--output',
help='*.po file to create')
'-g', '--gibberize', type=percentage, default=0.5, metavar="%",
help='fraction of gibberizable characters to "translate"')
'-p', '--padding', type=percentage, default=0.3, metavar="%",
help='fraction of original string length to pad')
args = parser.parse_args(argv)
formatting_regex = re.compile('(%(?:\([^\)]*\))?[0-9.*]*[sdxfr]|{[^}]*})')
# Use same additional keywords as project-config's
# jenkins/scripts/
keywords = dict(DEFAULT_KEYWORDS)
'_C': ((1, 'c'), 2),
'_P': (1, 2),
po_out = polib.POFile()
for _filename, _line, message, _comments, _context in \
extract_from_dir(, keywords=keywords):
translated = ''.join(
s if i % 2 else gibber(s, args.gibberize)
for i, s in enumerate(formatting_regex.split(message)))
if args.padding:
to_pad = max(5, int(math.ceil(args.padding * len(message))))
translated += u'\u2022' * to_pad
po_out.append(polib.POEntry(msgid=message, msgstr=translated))
except OSError as e:
if e.errno != errno.EEXIST:
po_out.save_as_mofile(os.path.splitext(args.output)[0] + '.mo')
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment