Created
January 3, 2014 14:05
-
-
Save kscc25/8238330 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
patterns = { | |
'[àáảãạăắằẵặẳâầấậẫẩ]': 'a', | |
'[đ]': 'd', | |
'[èéẻẽẹêềếểễệ]': 'e', | |
'[ìíỉĩị]': 'i', | |
'[òóỏõọôồốổỗộơờớởỡợ]': 'o', | |
'[ùúủũụưừứửữự]': 'u', | |
'[ỳýỷỹỵ]': 'y' | |
} | |
def convert(text): | |
""" | |
Convert from 'Tieng Viet co dau' thanh 'Tieng Viet khong dau' | |
text: input string to be converted | |
Return: string converted | |
""" | |
output = text | |
for regex, replace in patterns.items(): | |
output = re.sub(regex, replace, output) | |
# deal with upper case | |
output = re.sub(regex.upper(), replace.upper(), output) | |
return output | |
if __name__ == '__main__': | |
print(convert(sys.argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
mình có làm 1 phiên bản khác hoàn thiện hơn:
https://gist.github.com/phineas-pta/05cad38a29fea000ab6d9e13a6f7e623