Last active
November 29, 2022 01:52
-
-
Save congkhoa/bcd46eea4972d01aad15f2c603b560c7 to your computer and use it in GitHub Desktop.
TCVN3 to Unicode (python 3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
TCVN3TAB = "µ¸¶·¹¨»¾¼½Æ©ÇÊÈÉË®ÌÐÎÏѪÒÕÓÔÖ×ÝØÜÞßãáâä«åèæçé¬êíëìîïóñòô-õøö÷ùúýûüþ¡¢§£¤¥¦" # NOQA | |
TCVN3TAB = [ch for ch in TCVN3TAB] | |
UNICODETAB = "àáảãạăằắẳẵặâầấẩẫậđèéẻẽẹêềếểễệìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵĂÂĐÊÔƠƯ" # NOQA | |
UNICODETAB = [ch for ch in UNICODETAB] | |
r = re.compile("|".join(TCVN3TAB)) | |
replaces_dict = dict(zip(TCVN3TAB, UNICODETAB)) | |
def TCVN3_to_unicode(tcvn3str): | |
return r.sub(lambda m: replaces_dict[m.group(0)], tcvn3str) | |
def unicode_to_TCVN3(unicodestr): | |
return r.sub(lambda m: replaces_dict[m.group(0)], unicodestr) | |
# example | |
# in: TCVN3_to_unicode('hép møt tÕt') | |
# out: 'hộp mứt tết' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment