agriffis/stdout.py

## stdout.py
"""
Patch Python's sys.stdout and sys.stderr to encode to UTF-8

The short story is that sys.stdout.encoding is set based on the environment
variable LC_CTYPE (or LANG/LC_ALL) but only when stdout.isatty().
Otherwise those variables are ignored and stdout has no encoding, and it
CANNOT be set.

The long story is here (especially in the comments):
http://drj11.wordpress.com/2007/05/14/python-how-is-sysstdoutencoding-chosen/
"""

from codecs import StreamWriter, lookup
import os


def _get_codec(name):
    """
    Returns a CodecInfo using codecs.lookup(). If `name` is None, attempts
    to use locale environment variables, eventually falling back to UTF-8
    rather than ASCII.
    """
    codec = None

    if name:
        codec = lookup(name)
    else:
        # This is the proper priority order for these environment variables.
        name = os.environ.get('LC_ALL') or os.environ.get('LC_CTYPE') or os.environ.get('LANG') or ''
        if '.' in name:
            name = name.rsplit('.', 1)[-1]  # en_US.utf8
            if name:
                try:
                    codec = lookup(name)
                except Exception:
                    pass

    # If name wasn't passed in, and there was no usable environment
    # variable, fall back to UTF-8.
    if not codec:
        codec = lookup('utf8')

    return codec


class StdStreamWriter(StreamWriter):
    """
    StreamWriter that accepts either unicode or encoded byte strings; for
    the latter case this class assumes the input is already encoded
    properly, rather than trying to decode and re-encode.
    """
    def __init__(self, *args, **kwargs):
        self.codec = _get_codec(kwargs.pop('encoding', None))
        self.encoding = self.codec.name
        self._encode = self.codec.encode
        StreamWriter.__init__(self, *args, **kwargs)

    def encode(self, input, *args, **kwargs):
        if not isinstance(input, unicode):
            input = str(input)  # should be already
            return input, len(input)
        return self._encode(input, *args, **kwargs)


def monkey():
    """
    Monkey patch sys.stdout and sys.stderr to handle unicode objects
    properly.
    """
    import sys
    sys.stdout = StdStreamWriter(sys.stdout)
    sys.stderr = StdStreamWriter(sys.stderr)
	"""
	Patch Python's sys.stdout and sys.stderr to encode to UTF-8

	The short story is that sys.stdout.encoding is set based on the environment
	variable LC_CTYPE (or LANG/LC_ALL) but only when stdout.isatty().
	Otherwise those variables are ignored and stdout has no encoding, and it
	CANNOT be set.

	The long story is here (especially in the comments):
	http://drj11.wordpress.com/2007/05/14/python-how-is-sysstdoutencoding-chosen/
	"""

	from codecs import StreamWriter, lookup
	import os


	def _get_codec(name):
	"""
	Returns a CodecInfo using codecs.lookup(). If `name` is None, attempts
	to use locale environment variables, eventually falling back to UTF-8
	rather than ASCII.
	"""
	codec = None

	if name:
	codec = lookup(name)
	else:
	# This is the proper priority order for these environment variables.
	name = os.environ.get('LC_ALL') or os.environ.get('LC_CTYPE') or os.environ.get('LANG') or ''
	if '.' in name:
	name = name.rsplit('.', 1)[-1] # en_US.utf8
	if name:
	try:
	codec = lookup(name)
	except Exception:
	pass

	# If name wasn't passed in, and there was no usable environment
	# variable, fall back to UTF-8.
	if not codec:
	codec = lookup('utf8')

	return codec


	class StdStreamWriter(StreamWriter):
	"""
	StreamWriter that accepts either unicode or encoded byte strings; for
	the latter case this class assumes the input is already encoded
	properly, rather than trying to decode and re-encode.
	"""
	def __init__(self, args, *kwargs):
	self.codec = _get_codec(kwargs.pop('encoding', None))
	self.encoding = self.codec.name
	self._encode = self.codec.encode
	StreamWriter.__init__(self, args, *kwargs)

	def encode(self, input, args, *kwargs):
	if not isinstance(input, unicode):
	input = str(input) # should be already
	return input, len(input)
	return self._encode(input, args, *kwargs)


	def monkey():
	"""
	Monkey patch sys.stdout and sys.stderr to handle unicode objects
	properly.
	"""
	import sys
	sys.stdout = StdStreamWriter(sys.stdout)
	sys.stderr = StdStreamWriter(sys.stderr)