-
-
Save anonymous/5738996100755768665f to your computer and use it in GitHub Desktop.
rusofobica generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unicodedata as ud | |
caron = '\N{COMBINING CARON}' | |
cedilla = '\N{COMBINING CEDILLA}' | |
umlaut = '\N{COMBINING DIAERESIS}' # Really? | |
circumflex = '\N{COMBINING CIRCUMFLEX ACCENT}' | |
consonants = { | |
'б': 'b', | |
'в': 'v', | |
'г': 'g', | |
'д': 'd', | |
'ж': 'z' + caron, | |
'з': 'z', | |
'й': 'y', | |
'к': 'k', | |
'л': 'l', | |
'м': 'm', | |
'н': 'n', | |
'п': 'p', | |
'р': 'r', | |
'с': 's', | |
'т': 't', | |
'ф': 'f', | |
'х': 'h', | |
'ц': 'c', | |
'ч': 'c' + caron, | |
'ш': 's' + caron, | |
'щ': 's' + caron + cedilla, | |
} | |
vowels = { | |
'а': 'a', | |
'у': 'u', | |
'о': 'o', | |
'ы': 'i' + circumflex, | |
'и': 'i', | |
'э': 'e', | |
'я': 'a' + umlaut, | |
'ю': 'u' + umlaut, | |
'ё': 'o' + umlaut, | |
'е': 'e' + umlaut, | |
# Not really vowels, but | |
'ъ': '', | |
'ь': '' | |
} | |
soft_hard_pairs = ( | |
('я', 'а'), | |
('ю', 'у'), | |
('ё', 'о'), | |
('е', 'э'), | |
# Some kind of hack | |
('ь', 'ь'), | |
) | |
norm = lambda c: ud.normalize('NFKC', c) # Maybe NFC? | |
def make_pair(soft, hard): | |
"""Return function that creates pair of consonant + vowel with cedilla.""" | |
def inner(cyr, rus): | |
cyrillic = '{}{}'.format(cyr, soft) | |
if cyr != 'щ': | |
rusofobic = '{}{}'.format(norm(rus + cedilla), vowels[hard]) | |
else: | |
rusofobic = '{}{}'.format(norm(rus), vowels[soft]) | |
return [cyrillic, rusofobic] | |
return inner | |
def make_rules(): | |
"""Create mapping between cyrillic and rusofobic.""" | |
rules = {} | |
# Normalize consonants... | |
for cyr, rus in consonants.items(): | |
rules[cyr] = norm(rus) | |
# ...and vowels | |
for cyr, rus in vowels.items(): | |
rules[cyr] = norm(rus) | |
# And Ъ | |
for cyr, rus in consonants.items(): | |
rules['{}ъ'.format(cyr)] = norm(rus) | |
# Create pairs of consonant + vowel with cedilla | |
for pair in soft_hard_pairs: | |
func = make_pair(*pair) | |
result = [func(cyr, rus) for cyr, rus in consonants.items()] | |
for cyr, rus in consonants.items(): | |
pair = func(cyr, rus) | |
rules[pair[0]] = pair[1] | |
return rules | |
def make_table(rules): | |
"""Create transliteration mapping for all combinations of | |
capitalized and non capitalized characters.""" | |
table = {} | |
for cyr, rus in rules.items(): | |
table[cyr] = rus | |
table[cyr.upper()] = rus.upper() | |
if len(cyr) == 2 and len(rus) == 2: | |
table[cyr.capitalize()] = rus.capitalize() | |
table['{}{}'.format(cyr[0], cyr[1].upper())] = \ | |
'{}{}'.format(rus[0], rus[1].upper()) | |
return table | |
if __name__ == '__main__': | |
# Generate js object | |
tbl = make_table(make_rules()) | |
# Sorted pairs | |
pairs = ["'{}': '{}'".format(k, tbl[k]) for k in sorted(tbl.keys())] | |
print('{{\n{}\n}}'.format(',\n'.join(pairs))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment