Created
September 27, 2016 01:59
-
-
Save hanpama/2784d2a05e294c0ba87a564bc3678c32 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ensureCompleteForm(s): | |
'''문자열 안의 조합형 한글 부분을 완성형으로 바꾸기''' | |
i = 0 | |
while i+1 < len(s): | |
b1 = ord(s[i]) | |
b2 = ord(s[i+1]) | |
print(str.join('\t', [c for c in s])) | |
print('\t' * i + '@') | |
is_choseong = ( | |
0x1100 <= b1 and b1 < 0x1161 | |
) | |
is_choseong_followed_by_jungseong = ( | |
is_choseong and | |
0x1161 <= b2 and b2 < 0x11A8 | |
) | |
if is_choseong_followed_by_jungseong: | |
result = ((b1 - 0x1100) * 21 + b2 - 0x1161) * 28 + 0xAC00 | |
s = s[:i] + chr(result) + s[i+2:] | |
print("{} followed_by_jungseong {} merged into {}".format( | |
chr(b1), chr(b2), chr(result) | |
)) | |
continue | |
is_eumjeol_without_bachim = ( | |
0xAC00 <= b1 and b1 <= 0xD7A3 and | |
b1 % 0x1c == 0x10 | |
) | |
is_eumjeol_followed_by_jongseong = ( | |
is_eumjeol_without_bachim and | |
0x11A8 <= b2 and b2 <= 0x11FF | |
) | |
if is_eumjeol_followed_by_jongseong: | |
result = b1 + b2 - 0x11A7 | |
s = s[:i] + chr(result) + s[i+2:] | |
print("{} followed_by_jongseong {} merged into {}".format( | |
chr(b1), chr(b2), chr(result) | |
)) | |
continue | |
i += 1 | |
return s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment