Skip to content

Instantly share code, notes, and snippets.

@davidfraser
Last active January 24, 2020 13:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidfraser/b338ba07a6f058535a2d9786986ed8a3 to your computer and use it in GitHub Desktop.
Save davidfraser/b338ba07a6f058535a2d9786986ed8a3 to your computer and use it in GitHub Desktop.
Testing Python 2/3 environment variable handling on Windows, including calling subprocesses
This is some notes and sample code on interacting with Windows environment variable encodings etc. See README.md for more info.
win_encoding_check.class

Some background explanation to this research:

We are using the filesystem encoding for the environment variables on Python 2 to retain existing behaviour. Python 3 is better.

However, this doesn't actually work properly for characters that can't be encoded with the filesystemencoding. We don't tend to have any of these - our PATHs are generally program files or environment directories. Python 3 handles this better by using the Windows Unicode APIs (suffixed with W) for environment and subprocess calls Python 2 does this for general path manipulation if you use the text / unicode type. If it's necessary to emulate Python 3 behaviour, it can be done with ctypes (see http://stackoverflow.com/a/2608368):

def get_env(name):
    n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
    if n == 0:
        return None
    buf = ctypes.create_unicode_buffer(u'\0'*n)
    ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
    return buf.value

def set_env(name, value):
    return ctypes.windll.kernel32.SetEnvironmentVariableW(name, value)

def get_env_dict():
    key_list = ctypes.windll.kernel32.GetEnvironmentStringsW()
    environ = {}
    while True:
        env_def = ctypes.wstring_at(key_list)
        if env_def == '':
            break
        name, value = env_def.split(u'=', 1)
        environ[name] = value
        key_list += (len(env_def)+1)*2
    return environ

However, this doesn't help, because python 2.7's subprocess module uses the non-unicode CreateProcess[A], which can't handle unicode in its env and there aren't encodings that work. See http://stackoverflow.com/a/10360838. If something really needs to be done, the above methods can be used to set in-process environment variables which will be passed to the subprocess by default (or we could use a native Windows POpen like in processfamily)

Other related info: Windows batch files depend on the code page; chcp 65001 will set that locally to UTF-8, and cmd /U has an effect too Java has a command-line argument -Dfile.encoding=UTF-8 that does the same With Python 3 or Python 2 and the above ctypes approach, they can all interact sanely

@echo off
javac win_encoding_check.java
chcp
rem set codepage to UTF-8 to handle these Cyrillic characters correctly
chcp 65001
set UNICODE_BAT=ЖЗИЙКЛ
set UNICODE_BAT_ЖЗ=ЖЗИЙКЛ
python win_encoding_check.py
import java.util.Map;
public class win_encoding_check {
public static void main(String[] args) {
Map<String, String> env = System.getenv();
// Classic way to loop a map
for (Map.Entry<String, String> entry : env.entrySet()) {
if (entry.getKey().startsWith("UNICODE")) {
System.out.println(entry.getKey() + " : " + entry.getValue());
}
}
}
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import ctypes
import subprocess
import pprint
def get_env(name):
name = unicode(name) # make sure string argument is unicode
n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
if n == 0:
return None
buf = ctypes.create_unicode_buffer(u'\0'*n)
ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
return buf.value
def set_env(name, value):
name = unicode(name) # make sure string argument is unicode
value = unicode(value) # make sure string argument is unicode
return ctypes.windll.kernel32.SetEnvironmentVariableW(name, value)
def get_env_dict():
key_list = ctypes.windll.kernel32.GetEnvironmentStringsW()
environ = {}
while True:
env_def = ctypes.wstring_at(key_list)
if env_def == '':
break
name, value = env_def.split(u'=', 1)
environ[name] = value
key_list += (len(env_def)+1)*2
return environ
def show_encodings():
print("default encoding: %s" % sys.getdefaultencoding())
print("file system encoding: %s" % sys.getfilesystemencoding())
print()
def demo_listdir():
print("directory listing (bytes):")
for f in os.listdir(b'.'):
if not f.startswith(b'latin extended'): continue
try:
print(f)
except:
print("Error printing %r")
print()
print("directory listing (unicode):")
for f in os.listdir(u'.'):
if not f.startswith(u'latin extended'): continue
print(repr(f))
print()
def read_environment():
print("environment variables (native):")
print(repr(os.environ.get(b'USERNAME')))
print(repr(os.environ.get(b'UNICODE_TEST')))
print("environment variables (ctypes):")
print(repr(get_env(u'USERNAME')))
print(repr(get_env(u'UNICODE_TEST')))
def show_unicode_vars_ctype():
print("Showing environment variables read with ctypes:")
environ = get_env_dict()
pprint.pprint({k: v for k, v in get_env_dict().items() if k.startswith(u'UNICODE')})
def show_subprocess_unicode_vars():
print("Reading from subprocess: batch")
subprocess.call('set | find "UNICODE_PYTHON"', shell=True)
print("Reading from subprocess: python native")
subprocess.call('python -c "import os, pprint; pprint.pprint({k: v for k, v in os.environ.items() if k.startswith('"'"'UNICODE'"'"')})"', shell=True)
print("Reading from subprocess: python ctypes")
subprocess.call(['python', 'win_encoding_check.py', '--show-unicode-vars-ctype'])
print("Reading from subprocess: java")
subprocess.call(['java', '-Dfile.encoding=UTF-8', 'win_encoding_check'])
def pass_subprocess_unicode_vars():
pass_env = os.environ.copy()
pass_env.update({'UNICODE_PYTHON_ARG': u'ЎЏ'.encode('UTF-16')})
print("Reading from subprocess: batch")
subprocess.call('set | find "UNICODE_PYTHON"', shell=True, env=pass_env)
print("Reading from subprocess: python native")
subprocess.call('python -c "import os, pprint; pprint.pprint({k: v for k, v in os.environ.items() if k.startswith('"'"'UNICODE'"'"')})"', shell=True, env=pass_env)
print("Reading from subprocess: python ctypes")
subprocess.call(['python', 'win_encoding_check.py', '--show-unicode-vars-ctype'], env=pass_env)
print("Reading from subprocess: java")
subprocess.call(['java', '-Dfile.encoding=UTF-8', 'win_encoding_check'], env=pass_env)
def set_environment_ctypes():
print("setting environment variable (ctypes):")
set_env(u'UNICODE_PYTHON_CTYPES_ЎЏ', u'ЎЏ')
print(repr(get_env(u'UNICODE_PYTHON_CTYPES_ЎЏ')))
print("And reading natively")
print(repr(os.environ.get(u'UNICODE_PYTHON_CTYPES_ЎЏ')))
show_subprocess_unicode_vars()
def set_environment_native():
print("setting environment variable (native):")
unicode_key = u'UNICODE_PYTHON_NATIVE_ЎЏ'.encode(sys.getfilesystemencoding())
os.environ[unicode_key] = u'ЎЏ'.encode(sys.getfilesystemencoding())
print(repr(os.environ.get(unicode_key)))
show_subprocess_unicode_vars()
if __name__ == '__main__':
if '--show-unicode-vars-ctype' in sys.argv:
show_unicode_vars_ctype()
else:
show_encodings()
read_environment()
set_environment_ctypes()
set_environment_native()
pass_subprocess_unicode_vars()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment