Skip to content

anonymous /translit_error.py
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
# -*- coding: utf-8 -*-
import string
def translit1(string):
""" This function works just fine """
capital_letters = {
u'А': u'A',
u'Б': u'B',
u'В': u'V',
u'Г': u'G',
u'Д': u'D',
u'Е': u'E',
u'Ё': u'E',
u'Ж': u'Zh',
u'З': u'Z',
u'И': u'I',
u'Й': u'Y',
u'К': u'K',
u'Л': u'L',
u'М': u'M',
u'Н': u'N',
u'О': u'O',
u'П': u'P',
u'Р': u'R',
u'С': u'S',
u'Т': u'T',
u'У': u'U',
u'Ф': u'F',
u'Х': u'H',
u'Ц': u'Ts',
u'Ч': u'Ch',
u'Ш': u'Sh',
u'Щ': u'Sch',
u'Ъ': u'',
u'Ы': u'Y',
u'Ь': u'',
u'Э': u'E',
u'Ю': u'Yu',
u'Я': u'Ya'
}
lower_case_letters = {
u'а': u'a',
u'б': u'b',
u'в': u'v',
u'г': u'g',
u'д': u'd',
u'е': u'e',
u'ё': u'e',
u'ж': u'zh',
u'з': u'z',
u'и': u'i',
u'й': u'y',
u'к': u'k',
u'л': u'l',
u'м': u'm',
u'н': u'n',
u'о': u'o',
u'п': u'p',
u'р': u'r',
u'с': u's',
u'т': u't',
u'у': u'u',
u'ф': u'f',
u'х': u'h',
u'ц': u'ts',
u'ч': u'ch',
u'ш': u'sh',
u'щ': u'sch',
u'ъ': u'',
u'ы': u'y',
u'ь': u'',
u'э': u'e',
u'ю': u'yu',
u'я': u'ya'
}
translit_string = ""
for index, char in enumerate(string):
if char in lower_case_letters.keys():
char = lower_case_letters[char]
elif char in capital_letters.keys():
char = capital_letters[char]
if len(string) > index+1:
if string[index+1] not in lower_case_letters.keys():
char = char.upper()
else:
char = char.upper()
translit_string += char
return translit_string
def translit2(text):
""" This method should be more easy to grasp,
but throws exception:
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128)
"""
symbols = string.maketrans(u"абвгдеёзийклмнопрстуфхъыьэАБВГДЕЁЗИЙКЛМНОПРСТУФХЪЫЬЭ",
u"abvgdeezijklmnoprstufh'y'eABVGDEEZIJKLMNOPRSTUFH'Y'E")
sequence = {
u'ж':'zh',
u'ц':'ts',
u'ч':'ch',
u'ш':'sh',
u'щ':'sch',
u'ю':'ju',
u'я':'ja',
u'Ж':'Zh',
u'Ц':'Ts',
u'Ч':'Ch'
}
for char in sequence.keys():
text = text.replace(char, sequence[char])
return text.translate(symbols)
if __name__ == "__main__":
print translit1(u"Привет") # prints Privet as expected
print translit2(u"Привет") # throws exception: UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-51: ordinal not in range(128)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.