Last active
May 13, 2017 07:11
-
-
Save scateu/faa03c167066bf40d388 to your computer and use it in GitHub Desktop.
一个简单的UTF8/GBK/Unicode转换示例
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
一 | |
Unicode: u'\u4e00' | |
UTF-8: '\xe4\xb8\x80' | |
GBK: '\xd2\xbb' | |
******************** | |
龥 | |
Unicode: u'\u9fa5' | |
UTF-8: '\xe9\xbe\xa5' | |
GBK: '\xfd\x9b' | |
******************** | |
烫 | |
Unicode: u'\u70eb' | |
UTF-8: '\xe7\x83\xab' | |
GBK: '\xcc\xcc' | |
******************** | |
锟 | |
Unicode: u'\u951f' | |
UTF-8: '\xe9\x94\x9f' | |
GBK: '\xef\xbf' | |
******************** | |
斤 | |
Unicode: u'\u65a4' | |
UTF-8: '\xe6\x96\xa4' | |
GBK: '\xbd\xef' | |
******************** | |
拷 | |
Unicode: u'\u62f7' | |
UTF-8: '\xe6\x8b\xb7' | |
GBK: '\xbf\xbd' | |
******************** | |
锟斤拷 | |
Unicode: u'\u951f\u65a4\u62f7' | |
UTF-8: '\xe9\x94\x9f\xe6\x96\xa4\xe6\x8b\xb7' | |
GBK: '\xef\xbf\xbd\xef\xbf\xbd' | |
******************** | |
鐜 | |
Unicode: u'\u941c' | |
UTF-8: '\xe9\x90\x9c' | |
GBK: '\xe7\x8e' | |
******************** | |
嬪 | |
Unicode: u'\u5b2a' | |
UTF-8: '\xe5\xac\xaa' | |
GBK: '\x8b\xe5' | |
******************** | |
悍 | |
Unicode: u'\u608d' | |
UTF-8: '\xe6\x82\x8d' | |
GBK: '\xba\xb7' | |
******************** | |
鐜嬪悍 | |
Unicode: u'\u941c\u5b2a\u608d' | |
UTF-8: '\xe9\x90\x9c\xe5\xac\xaa\xe6\x82\x8d' | |
GBK: '\xe7\x8e\x8b\xe5\xba\xb7' | |
******************** | |
王 | |
Unicode: u'\u738b' | |
UTF-8: '\xe7\x8e\x8b' | |
GBK: '\xcd\xf5' | |
******************** | |
康 | |
Unicode: u'\u5eb7' | |
UTF-8: '\xe5\xba\xb7' | |
GBK: '\xbf\xb5' | |
******************** | |
王康 | |
Unicode: u'\u738b\u5eb7' | |
UTF-8: '\xe7\x8e\x8b\xe5\xba\xb7' | |
GBK: '\xcd\xf5\xbf\xb5' | |
******************** | |
王康 | |
UTF-8 misdecode as GBK: | |
鐜嬪悍 | |
u'\u941c\u5b2a\u608d' | |
GBK misdecode as UTF-8: | |
>_< | |
******************** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf8 -*- | |
def analysis(unicode_char): | |
print unicode_char | |
print "Unicode: ",repr(unicode_char) | |
print "UTF-8: ",repr(unicode_char.encode('utf8')) | |
print "GBK: ",repr(unicode_char.encode('gbk')) | |
print '*' * 20 | |
def chaos(unicode_string): | |
print unicode_string | |
print "UTF-8 misdecode as GBK:" | |
try: | |
c = unicode_string.encode('utf8').decode('gbk') | |
print c | |
print repr(c) | |
except: | |
print ">_<" | |
print "GBK misdecode as UTF-8:" | |
try: | |
d = repr(unicode_string.encode('gbk').decode('utf8')) | |
print d | |
print repr(d) | |
except: | |
print ">_<" | |
print '*' * 20 | |
if __name__ == "__main__": | |
analysis(u'\u4e00') | |
analysis(u'\u9fa5') | |
analysis(u'烫') | |
analysis(u'锟') | |
analysis(u'斤') | |
analysis(u'拷') | |
analysis(u'锟斤拷') | |
analysis(u'鐜') | |
analysis(u'嬪') | |
analysis(u'悍') | |
analysis(u'鐜嬪悍') | |
analysis(u'王') | |
analysis(u'康') | |
analysis(u'王康') | |
chaos(u'王康') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment