Skip to content

Instantly share code, notes, and snippets.

@maizy
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maizy/74398b8b20a9e1fe3b0d to your computer and use it in GitHub Desktop.
Save maizy/74398b8b20a9e1fe3b0d to your computer and use it in GitHub Desktop.
# encoding: utf-8
def to_bin(x):
return ' '.join('{:08b}'.format(ord(i)) for i in x)
embeded_unicode = u'💩'
char_in_unicode = u'\U0001f4a9'
assert(embeded_unicode == char_in_unicode)
print('len(char_in_unicode)')
print(len(char_in_unicode))
print('len(embeded_unicode)')
print(len(embeded_unicode))
print('\nto_bin(char_in_unicode.encode(utf-8))')
print(to_bin(char_in_unicode.encode('utf-8')))
print('\nto_bin(embeded_unicode.encode(utf-8))')
print(to_bin(embeded_unicode.encode('utf-8')))
print('\nto_bin(char_in_unicode[0].encode(utf-8))')
print(to_bin(char_in_unicode[0].encode('utf-8')))
print('\nto_bin(char_in_unicode[1].encode(utf-8))')
print(to_bin(char_in_unicode[1].encode('utf-8')))
assert(char_in_unicode[0].encode('utf-8') + char_in_unicode[1].encode('utf-8') != char_in_unicode.encode('utf-8'))
## lxml
from lxml import etree
x = etree.XML('<root/>')
x.text = u'char: [' + char_in_unicode + u']'
print('xml.text')
print(x.text)
print('etree.tostring(x, utf-8)')
print(etree.tostring(x, encoding='utf-8'))
print('etree.tostring(x, ascii)')
print(etree.tostring(x, encoding='ascii'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment