Created
March 27, 2017 00:48
-
-
Save parkjinwoo/91a62a9388a62d9a3ef9c2163a2c72bb to your computer and use it in GitHub Desktop.
초성 검색 : Unicode Study
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
chosungs = [u'ㄱ', u'ㄲ', u'ㄴ', u'ㄷ', u'ㄸ', u'ㄹ', u'ㅁ', u'ㅂ', u'ㅃ', | |
u'ㅅ', u'ㅆ', u'ㅇ', u'ㅈ', u'ㅉ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', | |
u'ㅎ'] | |
jungsungs = [u'ㅏ', u'ㅐ', u'ㅑ', u'ㅒ', u'ㅓ', u'ㅔ', u'ㅕ', u'ㅖ', u'ㅗ', | |
u'ㅘ', u'ㅙ', u'ㅚ', u'ㅛ', u'ㅜ', u'ㅝ', u'ㅞ', u'ㅟ', u'ㅠ', | |
u'ㅡ', u'ㅢ', u'ㅣ'] | |
jongsungs = [u'', u'ㄱ', u'ㄲ', u'ㄳ', u'ㄴ', u'ㄵ', u'ㄶ', u'ㄷ', u'ㄹ', | |
u'ㄺ', u'ㄻ', u'ㄼ', u'ㄽ', u'ㄾ', u'ㄿ', u'ㅀ', u'ㅁ', u'ㅂ', | |
u'ㅄ', u'ㅅ', u'ㅆ', u'ㅇ', u'ㅈ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', | |
u'ㅎ'] | |
CHO_LEN, JUNG_LEN, JONG_LEN = len(chosungs), len(jungsungs), len(jongsungs) | |
HAN_FST, HAN_LST = ord(u'가'), ord(u'힣') | |
JAUM_FST, JAUM_LST = ord(u'ㄱ'), ord(u'ㅎ') | |
def cho_jung_jong(han_char): | |
code_point = ord(han_char) | |
if HAN_FST <= code_point <= HAN_LST: | |
code_diff = code_point - HAN_FST | |
cho_idx = (code_diff / (JONG_LEN * JUNG_LEN)) % CHO_LEN | |
jung_idx = (code_diff / JONG_LEN) % JUNG_LEN | |
jong_idx = code_diff % JONG_LEN | |
return (chosungs[cho_idx], jungsungs[jung_idx], jongsungs[jong_idx]) | |
elif JAUM_FST <= code_point <= JAUM_LST: | |
cho_idx = code_point - JAUM_FST | |
return (chosungs[cho_idx], '', '') | |
else: | |
return ('', '', '') | |
def parse_chosung(han_str): | |
return ''.join([cho_jung_jong(i)[0] for i in han_str]) | |
def compare_chosung(i, target): | |
i, target = i.replace(' ', ''), target.replace(' ', '') | |
return (i in target) or (i in parse_chosung(target)) | |
def main(): | |
contacts = [i.decode('utf-8') for i in open('contacts.txt').readlines()] | |
print ''.join([i for i in contacts]) | |
my_input = unicode(raw_input('search: '), 'utf-8') | |
matches = [nm for nm in contacts if compare_chosung(my_input, nm)] | |
print ', '.join(matches) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
김애란 | |
김창완 | |
김훈 | |
밥 딜런 | |
빈센트 반 고흐 | |
장필순 | |
클로드 모네 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment