Skip to content

Instantly share code, notes, and snippets.

@pedrokoblitz
Forked from inky/aaaaaaargh
Last active December 26, 2015 05:59
Show Gist options
  • Save pedrokoblitz/7104884 to your computer and use it in GitHub Desktop.
Save pedrokoblitz/7104884 to your computer and use it in GitHub Desktop.
def to_unicode_or_bust(obj, encoding='utf-8'):
"""
http://farmdev.com/talks/unicode/
"""
if isinstance(obj, basestring):
if not isinstance(obj, unicode):
obj = unicode(obj, encoding)
return obj
import Cheetah.Filters
class UnicodeHarder(Cheetah.Filters.Filter):
def filter(self, val,
encoding='utf8',
str=str,
**kw):
""" Try our best to unicode our strings """
if not val:
return u''
if isinstance(val, unicode):
return val
try:
return val.decode('utf-8', 'strict')
except UnicodeDecodeError:
try:
return val.decode('latin-1', 'strict')
except UnicodeDecodeError:
return val.decode('ascii', 'ignore')
except AttributeError:
return unicode(val)
return val
import functools
class UnicodeMixin(object):
"""Mixin class to handle defining the proper __str__/__unicode__
methods in Python 2 or 3."""
if PY3:
def __str__(self):
return self.__unicode__()
else:
def __str__(self):
return self.__unicode__().encode('utf8')
def console_safe(func):
""" Decorator for making unicode functions console-friendly """
if PY3:
return func
@functools.wraps(func)
def inner(*args, **kwargs)
res = func(*args, **kwargs)
if isinstance(res, text_type): # XXX: use assert?
res = res.encode('unicode-escape')
return res
return inner
class SomeNltkClass(SomeNltkBase, UnicodeMixin):
# ...
def __unicode__(self):
return ...
@console_safe
def __repr__(self):
return self.__unicode__()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment