Skip to content

Instantly share code, notes, and snippets.

@mrpollo
Created May 9, 2012 22:44
Show Gist options
  • Save mrpollo/2649463 to your computer and use it in GitHub Desktop.
Save mrpollo/2649463 to your computer and use it in GitHub Desktop.
python unicode helpers
def smart_unicode(s, encoding='utf-8', errors='strict'):
if type(s) in (unicode, int, long, float, types.NoneType):
return unicode(s)
elif type(s) is str or hasattr(s, '__unicode__'):
return unicode(s, encoding, errors)
else:
return unicode(str(s), encoding, errors)
def smart_str(s, encoding='utf-8', errors='strict', from_encoding='utf-8'):
if type(s) in (int, long, float, types.NoneType):
return str(s)
elif type(s) is str:
if encoding != from_encoding:
return s.decode(from_encoding, errors).encode(encoding, errors)
else:
return s
elif type(s) is unicode:
return s.encode(encoding, errors)
elif hasattr(s, '__str__'):
return smart_str(str(s), encoding, errors, from_encoding)
elif hasattr(s, '__unicode__'):
return smart_str(unicode(s), encoding, errors, from_encoding)
else:
return smart_str(str(s), encoding, errors, from_encoding)
def to_unicode(value):
if isinstance(value, unicode):
return value
elif isinstance(value, str):
try:
if value.startswith('\xff\xfe'):
return value.decode('utf-16-le')
elif value.startswith('\xfe\xff'):
return value.decode('utf-16-be')
else:
return value.decode('utf-8')
except UnicodeDecodeError:
return value.decode('latin-1')
else:
try:
return unicode(value)
except UnicodeError:
return to_unicode(str(value))
except TypeError:
if hasattr(value, '__unicode__'):
return value.__unicode__()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment