anonymous / translit_error.py
Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist
View translit_error.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
# -*- coding: utf-8 -*-
 
import string
 
def translit1(string):
""" This function works just fine """
capital_letters = {
u'А': u'A',
u'Б': u'B',
u'В': u'V',
u'Г': u'G',
u'Д': u'D',
u'Е': u'E',
u'Ё': u'E',
u'Ж': u'Zh',
u'З': u'Z',
u'И': u'I',
u'Й': u'Y',
u'К': u'K',
u'Л': u'L',
u'М': u'M',
u'Н': u'N',
u'О': u'O',
u'П': u'P',
u'Р': u'R',
u'С': u'S',
u'Т': u'T',
u'У': u'U',
u'Ф': u'F',
u'Х': u'H',
u'Ц': u'Ts',
u'Ч': u'Ch',
u'Ш': u'Sh',
u'Щ': u'Sch',
u'Ъ': u'',
u'Ы': u'Y',
u'Ь': u'',
u'Э': u'E',
u'Ю': u'Yu',
u'Я': u'Ya'
}
 
lower_case_letters = {
u'а': u'a',
u'б': u'b',
u'в': u'v',
u'г': u'g',
u'д': u'd',
u'е': u'e',
u'ё': u'e',
u'ж': u'zh',
u'з': u'z',
u'и': u'i',
u'й': u'y',
u'к': u'k',
u'л': u'l',
u'м': u'm',
u'н': u'n',
u'о': u'o',
u'п': u'p',
u'р': u'r',
u'с': u's',
u'т': u't',
u'у': u'u',
u'ф': u'f',
u'х': u'h',
u'ц': u'ts',
u'ч': u'ch',
u'ш': u'sh',
u'щ': u'sch',
u'ъ': u'',
u'ы': u'y',
u'ь': u'',
u'э': u'e',
u'ю': u'yu',
u'я': u'ya'
}
 
translit_string = ""
 
for index, char in enumerate(string):
if char in lower_case_letters.keys():
char = lower_case_letters[char]
elif char in capital_letters.keys():
char = capital_letters[char]
if len(string) > index+1:
if string[index+1] not in lower_case_letters.keys():
char = char.upper()
else:
char = char.upper()
translit_string += char
 
return translit_string
 
 
def translit2(text):
""" This method should be more easy to grasp,
but throws exception:
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128)
"""
 
symbols = string.maketrans(u"абвгдеёзийклмнопрстуфхъыьэАБВГДЕЁЗИЙКЛМНОПРСТУФХЪЫЬЭ",
u"abvgdeezijklmnoprstufh'y'eABVGDEEZIJKLMNOPRSTUFH'Y'E")
sequence = {
u'ж':'zh',
u'ц':'ts',
u'ч':'ch',
u'ш':'sh',
u'щ':'sch',
u'ю':'ju',
u'я':'ja',
u'Ж':'Zh',
u'Ц':'Ts',
u'Ч':'Ch'
}
 
for char in sequence.keys():
text = text.replace(char, sequence[char])
 
return text.translate(symbols)
 
if __name__ == "__main__":
print translit1(u"Привет") # prints Privet as expected
print translit2(u"Привет") # throws exception: UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.