Skip to content

Instantly share code, notes, and snippets.

@xyb
Created September 10, 2013 03:28
Show Gist options
  • Save xyb/6504619 to your computer and use it in GitHub Desktop.
Save xyb/6504619 to your computer and use it in GitHub Desktop.
printing-friendly repr string for CJK characters.
# coding: utf8
import re
REPR_UNICODE_CHAR = re.compile(r'(?<!\\)(\\u[0-9a-f]{4,4})')
def readable_repr(obj):
'''Return printing-friendly unicode string
>>> u = u'cjk 中日韩 \\u535a'
>>> u
u'cjk \u4e2d\u65e5\u97e9 \\u535a'
>>> repr(u)
"u'cjk \\u4e2d\\u65e5\\u97e9 \\\\u535a'"
>>> readable_repr(u)
u"u'cjk \u4e2d\u65e5\u97e9 \\\\u535a'"
>>> print repr(u)
u'cjk \u4e2d\u65e5\u97e9 \\u535a'
>>> print readable_repr(u)
u'cjk 中日韩 \\u535a'
'''
def replace_unicode_char(repr_char):
return unichr(int(str(repr_char.group())[2:], base=16))
repr_string = repr(obj)
return REPR_UNICODE_CHAR.sub(replace_unicode_char, repr_string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment