Created
May 9, 2012 22:44
-
-
Save mrpollo/2649463 to your computer and use it in GitHub Desktop.
python unicode helpers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def smart_unicode(s, encoding='utf-8', errors='strict'): | |
if type(s) in (unicode, int, long, float, types.NoneType): | |
return unicode(s) | |
elif type(s) is str or hasattr(s, '__unicode__'): | |
return unicode(s, encoding, errors) | |
else: | |
return unicode(str(s), encoding, errors) | |
def smart_str(s, encoding='utf-8', errors='strict', from_encoding='utf-8'): | |
if type(s) in (int, long, float, types.NoneType): | |
return str(s) | |
elif type(s) is str: | |
if encoding != from_encoding: | |
return s.decode(from_encoding, errors).encode(encoding, errors) | |
else: | |
return s | |
elif type(s) is unicode: | |
return s.encode(encoding, errors) | |
elif hasattr(s, '__str__'): | |
return smart_str(str(s), encoding, errors, from_encoding) | |
elif hasattr(s, '__unicode__'): | |
return smart_str(unicode(s), encoding, errors, from_encoding) | |
else: | |
return smart_str(str(s), encoding, errors, from_encoding) | |
def to_unicode(value): | |
if isinstance(value, unicode): | |
return value | |
elif isinstance(value, str): | |
try: | |
if value.startswith('\xff\xfe'): | |
return value.decode('utf-16-le') | |
elif value.startswith('\xfe\xff'): | |
return value.decode('utf-16-be') | |
else: | |
return value.decode('utf-8') | |
except UnicodeDecodeError: | |
return value.decode('latin-1') | |
else: | |
try: | |
return unicode(value) | |
except UnicodeError: | |
return to_unicode(str(value)) | |
except TypeError: | |
if hasattr(value, '__unicode__'): | |
return value.__unicode__() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment