Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dorukcan
Last active March 12, 2019 07:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dorukcan/de40ce86942dcbb30064db917b591501 to your computer and use it in GitHub Desktop.
Save dorukcan/de40ce86942dcbb30064db917b591501 to your computer and use it in GitHub Desktop.
def make_turkish(text, encode=False):
"""
Transforms a broken text to nice unicode equivalent.
:param text: Text to modify
:param encode: Boolean value indicates that whether encode the text to utf-8 or not
:return: Modified text
"""
if not text:
return None
if encode is True:
text = text.encode('latin1').decode('utf-8', 'backslashreplace')
tr_map = {
'\\xa0': ' ',
'\xa0': ' ',
'\t': ' ',
" ": "",
" ": "",
"¡": "¡",
"¡": "¡",
"¢": "¢",
"¢": "¢",
"£": "£",
"£": "£",
"¤": "¤",
"¤": "¤",
"¥": "¥",
"¥": "¥",
"¦": "¦",
"¦": "¦",
"§": "§",
"§": "§",
"¨": "¨",
"¨": "¨",
"©": "©",
"ª": "ª",
"ª": "ª",
"«": "«",
"«": "«",
"¬": "¬",
"¬": "¬",
"­": "­",
"­": "­",
"¯": "¯",
"¯": "¯",
"°": "°",
"°": "°",
"±": "±",
"±": "±",
"²": "²",
"²": "²",
"³": "³",
"³": "³",
"´": "´",
"´": "´",
'•': '•',
'“': '“',
'”': '”',
'‘': '‘',
'’': '’',
'ݾ': 'İ',
'Ý': 'İ',
'Ä°': 'İ',
'Ý': 'İ',
'‹': 'İ',
'Ý': 'İ',
'ý': 'ı',
'ı': 'ı',
'±': 'ı',
'ý': 'ı',
'Û': 'ı',
'›': 'ı',
'ý': 'ı',
'Þ': 'Ş',
'Åž': 'Ş',
'ÅŸ': 'Ş',
'åÿ': 'Ş',
'Þ': 'Ş',
'þ': 'ş',
'Å?': 'ş',
'ÅŸ': 'ş',
'þ': 'ş',
'Å\x9f': 'ş',
'Ð': 'Ğ',
'Äž': 'Ğ',
'ð': 'ğ',
'Ä?': 'ğ',
'ÄŸ': 'ğ',
'ð': 'ğ',
'Ä\x9f': 'ğ',
'Ç': 'Ç',
'Ã?': 'Ç',
'Ç': 'Ç',
'ç': 'ç',
'ç': 'ç',
'Ö': 'Ö',
'Ö': 'Ö',
'ö': 'ö',
'ö': 'ö',
'Ãœ': 'Ü',
'Ü': 'Ü',
'ü': 'ü',
'ã¼': 'ü',
'ü': 'ü',
'ü': 'ü',
"©": "©",
"©": "©",
"©": "©",
"®": "®",
"®": "®",
"®": "®",
"®": "®",
"®": "®",
"™": "™",
"™": "™",
"™": "™",
"™": "™",
">": ">",
">": ">",
">": ">",
">": ">",
"&lt;": "<",
"&LT;": "<",
"&#x0003C;": "<",
"&#60;": "<",
"&semi;": ";",
"&#x0003B;": ";",
"&#59;": ";",
"&amp;": "&",
"&AMP;": "&",
"&#x00026;": "&",
"&#38;": "&",
"&quot;": '"',
"&QUOT;": '"',
"&#x00022;": '"',
"&#34;": '"',
"&num;": "#",
"&#x00023;": "#",
"&#35;": "#",
"&euro;": "€",
"â\x80\x99": "'",
"â\x80\x9d": '"',
}
for key, value in tr_map.items():
text = text.replace(key, value)
return text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment