-
-
Save pedrokoblitz/7104884 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def to_unicode_or_bust(obj, encoding='utf-8'): | |
""" | |
http://farmdev.com/talks/unicode/ | |
""" | |
if isinstance(obj, basestring): | |
if not isinstance(obj, unicode): | |
obj = unicode(obj, encoding) | |
return obj |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Cheetah.Filters | |
class UnicodeHarder(Cheetah.Filters.Filter): | |
def filter(self, val, | |
encoding='utf8', | |
str=str, | |
**kw): | |
""" Try our best to unicode our strings """ | |
if not val: | |
return u'' | |
if isinstance(val, unicode): | |
return val | |
try: | |
return val.decode('utf-8', 'strict') | |
except UnicodeDecodeError: | |
try: | |
return val.decode('latin-1', 'strict') | |
except UnicodeDecodeError: | |
return val.decode('ascii', 'ignore') | |
except AttributeError: | |
return unicode(val) | |
return val |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
class UnicodeMixin(object): | |
"""Mixin class to handle defining the proper __str__/__unicode__ | |
methods in Python 2 or 3.""" | |
if PY3: | |
def __str__(self): | |
return self.__unicode__() | |
else: | |
def __str__(self): | |
return self.__unicode__().encode('utf8') | |
def console_safe(func): | |
""" Decorator for making unicode functions console-friendly """ | |
if PY3: | |
return func | |
@functools.wraps(func) | |
def inner(*args, **kwargs) | |
res = func(*args, **kwargs) | |
if isinstance(res, text_type): # XXX: use assert? | |
res = res.encode('unicode-escape') | |
return res | |
return inner | |
class SomeNltkClass(SomeNltkBase, UnicodeMixin): | |
# ... | |
def __unicode__(self): | |
return ... | |
@console_safe | |
def __repr__(self): | |
return self.__unicode__() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment