Created
May 11, 2016 11:51
-
-
Save tehasdf/61d10a287c49d3500d96c9f3b7c60d39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
Example of how to deal with user objects that can implement either __unicode__ | |
or __str__, and we don't know which - eg. exceptions, into which the user | |
might've passed a bytes message or a unicode message. | |
One function shows how to convert both cases to bytes (str), the other function | |
shows how to convert both cases to unicode - depending what we need. | |
BTW note that it is much better to always know if we're dealing with bytes or | |
unicode, but it's not really possible with user-supplied objects :( | |
""" | |
# the `coding: utf-8` line is only needed because this file contains non-ascii | |
# characters, here: | |
UNICODE_DATA = u'łódź' | |
BYTES_DATA = b'łódź' | |
# we wouldn't need the `coding:` line if we used this instead: | |
UNICODE_DATA = u'\u0142\xf3d\u017a' | |
BYTES_DATA = '\xc5\x82\xc3\xb3d\xc5\xba' | |
# (which does the same thing) | |
# recap & explanation of bytes/unicode: | |
# u'x' is unicode, 'x' is bytes/str (in python 2), b'x' is also bytes/str | |
# uni.encode(encoding) -> bytes, bytes.decode(encoding) -> unicode | |
# unicode(x) calls x.__unicode__, and if that doesn't exist, it calls | |
# x.__str__ and does a .decode() with the default encoding (ascii) | |
# str(x) calls x.__str__, doesn't call __unicode__, but if __str__ returns | |
# a unicode object, then it tries to .encode() with the default encoding | |
# note that: def x(self): return "ł" <-- this returns BYTES, not unicode | |
# however: def x(self): return u"ł" <-- this returns unicode | |
# if you need a more in-depth explanation of bytes/unicode, see bit.ly/unipain | |
class A(object): | |
def __str__(self): | |
return BYTES_DATA | |
class B(object): | |
def __unicode__(self): | |
return UNICODE_DATA | |
a = A() | |
b = B() | |
v1 = ValueError(BYTES_DATA) | |
v2 = ValueError(UNICODE_DATA) | |
def f(x): | |
"""Given an object that can implement __str__ or __unicode__, but we dont | |
know which, return bytes (str) of it. | |
If the object implements __str__, just return that; if it implements | |
__unicode__, encode with utf-8. | |
""" | |
try: | |
d = unicode(x) | |
except UnicodeDecodeError: | |
return str(x) | |
return d.encode('utf-8') | |
def f2(x): | |
"""Given an object that can implement __str__ or __unicode__, but we dont | |
know which, return unicode() of it. | |
If the object implements __unicode__, just return that; if it implements | |
__str__, decode with utf-8. | |
""" | |
try: | |
d = unicode(x) | |
except UnicodeDecodeError: | |
d = str(x).decode('utf-8') | |
return d | |
assert f(a) == f(b) == f(v1) == f(v2) == BYTES_DATA | |
assert f2(a) == f2(b) == f2(v1) == f2(v2) == UNICODE_DATA |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment