Skip to content

Instantly share code, notes, and snippets.

@codeforkjeff
Created November 4, 2014 23:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codeforkjeff/d9c15f224c7163131c38 to your computer and use it in GitHub Desktop.
Save codeforkjeff/d9c15f224c7163131c38 to your computer and use it in GitHub Desktop.
import codecs
import subprocess
import sys
teststr = u'This is a block character: \u2588'
#### try #1: reproduce the problem
# demonstration of http://bugs.python.org/issue6135
# stdin obj created by Popen will default to 'ascii' encoding
# cat will just echo back its stdin stream, so it's a good test.
proc = subprocess.Popen("cat", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
proc.stdin.write(teststr)
except UnicodeEncodeError, e:
print "Got decode error, as expected."
proc.terminate()
#### try #2: the fix
proc = subprocess.Popen("cat", stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# replace our i/o file objects with wrapped versions that handle utf-8 data correctly
proc.stdin = codecs.getwriter('utf-8')(proc.stdin)
proc.stdout = codecs.getreader('utf-8')(proc.stdout)
# now we can write to stdin successfully.
proc.stdin.write(teststr)
proc.stdin.flush()
proc.stdin.close()
# we can read utf-8 from stdout too
output = proc.stdout.readline()
if output == teststr:
print "Input and output data matched! hooray!"
# On my machine, the default encoding is something ANSI_X3.4-1968. I
# don't even know what that is. In my shell, if I do "unset LC_ALL"
# before running this script, Python will set the encoding to
# UTF-8. Don't ask me how I know this.
if sys.stdout.encoding == 'UTF-8':
print output
else:
print "Encoding of Python script's stdout is %s instead of UTF-8, so I won't try to print the test str" % (sys.stdout.encoding,)
print "Success! Reached end of script."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment