Skip to content

Instantly share code, notes, and snippets.

@hexists
Last active June 8, 2016 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hexists/8aba80f4425d1a9230ae to your computer and use it in GitHub Desktop.
Save hexists/8aba80f4425d1a9230ae to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
import re
import sys
NUMERIC_CODE_PATTERN = re.compile(r'&#(\d+);')
def euc_to_utf(string):
'''
euc_to_utf('쿠보즈카 슌스케')
쿠보즈카 슌스케
'''
if string is None or 0 == len(string.strip()):
return None
string = (unicode(string, 'cp949')).encode('utf-8')
numeric_code_patterns = re.findall(NUMERIC_CODE_PATTERN, string)
# print numeric_code_patterns
for pat in numeric_code_patterns:
string = string.replace('&#%s;' % pat, unichr(int(pat)).encode('utf8'))
return string
if __name__ == '__main__':
for buf in sys.stdin:
line = buf.rstrip()
utf_string = euc_to_utf(line)
print utf_string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment