Skip to content

Instantly share code, notes, and snippets.

@stantonk
Created February 6, 2013 20:20
Show Gist options
  • Save stantonk/4725452 to your computer and use it in GitHub Desktop.
Save stantonk/4725452 to your computer and use it in GitHub Desktop.
Shows how unicode & str types, utf-8 and ascii encodings interoperate with each other in Python 2.x
import sys
def _helper(s, method, encoding):
try:
print 'print s.%s(\'%s\'): %s' % (method, encoding, getattr(s, method)(encoding))
except UnicodeError as e:
print 'print s.%s(\'%s\'): %s' % (method, encoding, e)
def test_encodings(s):
print 'print s: %s' % s
_helper(s, 'encode', 'utf-8')
_helper(s, 'encode', 'ascii')
_helper(s, 'encode', 'unicode-escape')
_helper(s, 'decode', 'utf-8')
_helper(s, 'decode', 'ascii')
_helper(s, 'decode', 'unicode-escape')
print '\n'
print 'your python encoding is set to: %s\n' % sys.getdefaultencoding()
packed_bytes_str = '\\xe3\\x84\\x8a'
print 's = \'%s\'' % packed_bytes_str
packed_bytes_str = '\xe3\x84\x8a'
test_encodings(packed_bytes_str)
packed_bytes_unicode = u'\\xe3\\x84\\x8a'
print 's = u\'%s\'' % packed_bytes_unicode
packed_bytes_unicode = u'\xe3\x84\x8a'
test_encodings(packed_bytes_unicode)
utf8_str = '\\u310a'
print 's = \'%s\'' % utf8_str
utf8_str = '\u310a'
test_encodings(utf8_str)
utf8_unicode = u'\\u310a'
print 's = u\'%s\'' % utf8_unicode
utf8_unicode = u'\u310a'
test_encodings(utf8_unicode)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment