Skip to content

Instantly share code, notes, and snippets.

@FiloSottile
Created April 19, 2012 17:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FiloSottile/2422344 to your computer and use it in GitHub Desktop.
Save FiloSottile/2422344 to your computer and use it in GitHub Desktop.
Some testing around OS I/O encodings in python
import sys, locale
e_grave = u'\xe8'
param = sys.argv[1]
sys.stderr.write('Type: ')
inp = raw_input()
encodings = (('locale.getpreferredencoding()', locale.getpreferredencoding()),
('sys.stdout.encoding', sys.stdout.encoding),
('utf8', 'utf8'))
for name, encoding in encodings:
print ''
print '***', name, ':', encoding
if encoding:
print "u'\\xe8' ", e_grave.encode(encoding, 'replace')
print 'repr(sys.argv[1])', repr(param.decode(encoding, 'replace'))
print 'repr(raw_input())', repr(inp.decode(encoding, 'replace'))
*** locale.getpreferredencoding() : UTF-8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** sys.stdout.encoding : UTF-8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** utf8 : utf8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** locale.getpreferredencoding() : UTF-8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** sys.stdout.encoding : UTF-8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** utf8 : utf8
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** locale.getpreferredencoding() : cp1252
u'\xe8' Þ
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\u0160'
*** sys.stdout.encoding : cp850
u'\xe8' è
repr(sys.argv[1]) u'\xde'
repr(raw_input()) u'\xe8'
*** utf8 : utf8
u'\xe8' è
repr(sys.argv[1]) u'\ufffd'
repr(raw_input()) u'\ufffd'
*** locale.getpreferredencoding() : cp1252
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** sys.stdout.encoding : cp1252
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\xe8'
*** utf8 : utf8
u'\xe8' è
repr(sys.argv[1]) u'\ufffd'
repr(raw_input()) u'\ufffd'
*** locale.getpreferredencoding() : cp1252
u'\xe8' è
repr(sys.argv[1]) u'\xe8'
repr(raw_input()) u'\u0160'
*** sys.stdout.encoding : None
*** utf8 : utf8
u'\xe8' è
repr(sys.argv[1]) u'\ufffd'
repr(raw_input()) u'\ufffd'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment