tipabu/gibber.py

## gibber.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
A Python gibberizer for (swift) locale strings.

This will extract all of the strings currently marked for translation,
swap some of the ASCII letters (like 'A') for non-ASCII look-alikes (like 'Á'),
extend them with some padding characters to test label-widths, and
create *.po and *.mo files that may be used by Python's gettext.

After running this, you may want to restart swift with:
    SWIFT_LOCALEDIR=swift/locale/ LANG=en_ZZ swift-init all restart
"""
import argparse
import errno
import math
import os
import random
import re
import sys

from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS
import polib
from six import unichr


def gibber(str, chance=0.50):
    """Translate (some) characters in a string to Gibberish."""
    return ''.join(
        random.choice(gibber.map[c])
        if c in gibber.map and random.random() < chance
        else c
        for c in str)
gibber.map = {
    'A': [unichr(c) for c in range(192, 199)],
    'C': u'\xC7',
    'D': u'\xD0',
    'E': [unichr(c) for c in range(200, 204)],
    'I': [unichr(c) for c in range(204, 208)],
    'N': u'\xD1',
    'O': [unichr(c) for c in range(210, 215)] + [u'\xD8'],
    'U': [unichr(c) for c in range(217, 221)],
    'Y': [u'\xDD', u'\u0178'],
    'a': [unichr(c) for c in range(224, 231)],
    'c': u'\xE7',
    'd': u'\xF0',
    'e': [unichr(c) for c in range(232, 236)],
    'i': [unichr(c) for c in range(236, 240)],
    'n': u'\xF1',
    'o': [unichr(c) for c in range(242, 247)] + [u'\xF8'],
    'u': [unichr(c) for c in range(249, 253)],
    'y': [u'\xFD', u'\xFF'],
}


def main(argv):
    class _formatter(argparse.RawTextHelpFormatter):
        # Cribbed from argparse.ArgumentDefaultsHelpFormatter
        def _get_help_string(self, action):
            help = action.help
            if all([
                    '%(default)' not in action.help,
                    action.default is not argparse.SUPPRESS,
                    action.option_strings or
                    action.nargs in (argparse.OPTIONAL,
                                     argparse.ZERO_OR_MORE)]):
                help += '\n(default: %(default)s)'
            return help

    def percentage(val):
        val = float(val)
        if val < 0 or val > 1:
            raise ValueError('value is not in range [0, 1]')
        return val

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=_formatter)
    parser.add_argument(
        '-d', '--directory', default='swift',
        help='directory from which to extract messages')
    parser.add_argument(
        '-o', '--output',
        default='swift/locale/en_ZZ/LC_MESSAGES/swift.po',
        help='*.po file to create')
    parser.add_argument(
        '-g', '--gibberize', type=percentage, default=0.5, metavar="%",
        help='fraction of gibberizable characters to "translate"')
    parser.add_argument(
        '-p', '--padding', type=percentage, default=0.3, metavar="%",
        help='fraction of original string length to pad')
    args = parser.parse_args(argv)

    formatting_regex = re.compile('(%(?:\([^\)]*\))?[0-9.*]*[sdxfr]|{[^}]*})')

    # Use same additional keywords as project-config's
    # jenkins/scripts/common_translation_update.sh
    keywords = dict(DEFAULT_KEYWORDS)
    keywords.update({
        '_C': ((1, 'c'), 2),
        '_P': (1, 2),
    })

    po_out = polib.POFile()
    for _filename, _line, message, _comments, _context in \
            extract_from_dir(args.directory, keywords=keywords):
        translated = ''.join(
            s if i % 2 else gibber(s, args.gibberize)
            for i, s in enumerate(formatting_regex.split(message)))
        if args.padding:
            to_pad = max(5, int(math.ceil(args.padding * len(message))))
            translated += u'\u2022' * to_pad
        po_out.append(polib.POEntry(msgid=message, msgstr=translated))

    try:
        os.makedirs(os.path.dirname(args.output))
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    po_out.save(args.output)
    po_out.save_as_mofile(os.path.splitext(args.output)[0] + '.mo')

if __name__ == '__main__':
    main(sys.argv[1:])
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	A Python gibberizer for (swift) locale strings.

	This will extract all of the strings currently marked for translation,
	swap some of the ASCII letters (like 'A') for non-ASCII look-alikes (like 'Á'),
	extend them with some padding characters to test label-widths, and
	create .po and .mo files that may be used by Python's gettext.

	After running this, you may want to restart swift with:
	SWIFT_LOCALEDIR=swift/locale/ LANG=en_ZZ swift-init all restart
	"""
	import argparse
	import errno
	import math
	import os
	import random
	import re
	import sys

	from babel.messages.extract import extract_from_dir, DEFAULT_KEYWORDS
	import polib
	from six import unichr


	def gibber(str, chance=0.50):
	"""Translate (some) characters in a string to Gibberish."""
	return ''.join(
	random.choice(gibber.map[c])
	if c in gibber.map and random.random() < chance
	else c
	for c in str)
	gibber.map = {
	'A': [unichr(c) for c in range(192, 199)],
	'C': u'\xC7',
	'D': u'\xD0',
	'E': [unichr(c) for c in range(200, 204)],
	'I': [unichr(c) for c in range(204, 208)],
	'N': u'\xD1',
	'O': [unichr(c) for c in range(210, 215)] + [u'\xD8'],
	'U': [unichr(c) for c in range(217, 221)],
	'Y': [u'\xDD', u'\u0178'],
	'a': [unichr(c) for c in range(224, 231)],
	'c': u'\xE7',
	'd': u'\xF0',
	'e': [unichr(c) for c in range(232, 236)],
	'i': [unichr(c) for c in range(236, 240)],
	'n': u'\xF1',
	'o': [unichr(c) for c in range(242, 247)] + [u'\xF8'],
	'u': [unichr(c) for c in range(249, 253)],
	'y': [u'\xFD', u'\xFF'],
	}


	def main(argv):
	class _formatter(argparse.RawTextHelpFormatter):
	# Cribbed from argparse.ArgumentDefaultsHelpFormatter
	def _get_help_string(self, action):
	help = action.help
	if all([
	'%(default)' not in action.help,
	action.default is not argparse.SUPPRESS,
	action.option_strings or
	action.nargs in (argparse.OPTIONAL,
	argparse.ZERO_OR_MORE)]):
	help += '\n(default: %(default)s)'
	return help

	def percentage(val):
	val = float(val)
	if val < 0 or val > 1:
	raise ValueError('value is not in range [0, 1]')
	return val

	parser = argparse.ArgumentParser(
	description=__doc__,
	formatter_class=_formatter)
	parser.add_argument(
	'-d', '--directory', default='swift',
	help='directory from which to extract messages')
	parser.add_argument(
	'-o', '--output',
	default='swift/locale/en_ZZ/LC_MESSAGES/swift.po',
	help='*.po file to create')
	parser.add_argument(
	'-g', '--gibberize', type=percentage, default=0.5, metavar="%",
	help='fraction of gibberizable characters to "translate"')
	parser.add_argument(
	'-p', '--padding', type=percentage, default=0.3, metavar="%",
	help='fraction of original string length to pad')
	args = parser.parse_args(argv)

	formatting_regex = re.compile('(%(?:\([^\)]\))?[0-9.][sdxfr]\|{[^}]})')

	# Use same additional keywords as project-config's
	# jenkins/scripts/common_translation_update.sh
	keywords = dict(DEFAULT_KEYWORDS)
	keywords.update({
	'_C': ((1, 'c'), 2),
	'_P': (1, 2),
	})

	po_out = polib.POFile()
	for _filename, _line, message, _comments, _context in \
	extract_from_dir(args.directory, keywords=keywords):
	translated = ''.join(
	s if i % 2 else gibber(s, args.gibberize)
	for i, s in enumerate(formatting_regex.split(message)))
	if args.padding:
	to_pad = max(5, int(math.ceil(args.padding * len(message))))
	translated += u'\u2022' * to_pad
	po_out.append(polib.POEntry(msgid=message, msgstr=translated))

	try:
	os.makedirs(os.path.dirname(args.output))
	except OSError as e:
	if e.errno != errno.EEXIST:
	raise
	po_out.save(args.output)
	po_out.save_as_mofile(os.path.splitext(args.output)[0] + '.mo')

	if __name__ == '__main__':
	main(sys.argv[1:])