Skip to content

Instantly share code, notes, and snippets.

@oiuww09fn
Created July 23, 2014 14:06
Show Gist options
  • Save oiuww09fn/27b53610e77a47feb4f1 to your computer and use it in GitHub Desktop.
Save oiuww09fn/27b53610e77a47feb4f1 to your computer and use it in GitHub Desktop.
robotframework encoding
# Copyright 2008-2014 Nokia Solutions and Networks
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from .encodingsniffer import get_output_encoding, get_system_encoding
from .unic import unic
OUTPUT_ENCODING = get_output_encoding()
SYSTEM_ENCODING = get_system_encoding()
def decode_output(string, force=False):
"""Decodes bytes from console encoding to Unicode.
By default returns Unicode strings as-is. `force` argument can be used
on IronPython where all strings are `unicode` and caller knows decoding
is needed.
"""
if isinstance(string, unicode) and not force:
return string
return unic(string, OUTPUT_ENCODING)
def encode_output(string, errors='replace'):
"""Encodes Unicode to bytes in console encoding."""
# http://ironpython.codeplex.com/workitem/29487
if sys.platform == 'cli':
return string
return string.encode(OUTPUT_ENCODING, errors)
def decode_from_system(string, can_be_from_java=True):
"""Decodes bytes from system (e.g. cli args or env vars) to Unicode."""
if sys.platform == 'cli':
return string
if sys.platform.startswith('java') and can_be_from_java:
# http://bugs.jython.org/issue1592
from java.lang import String
string = String(string)
return unic(string, SYSTEM_ENCODING)
def encode_to_system(string, errors='replace'):
"""Encodes Unicode to system encoding (e.g. cli args and env vars)."""
return string.encode(SYSTEM_ENCODING, errors)
# Copyright 2008-2014 Nokia Solutions and Networks
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
ANY = True
UNIXY = os.sep == '/'
WINDOWS = not UNIXY
JYTHON = sys.platform.startswith('java')
if UNIXY:
DEFAULT_SYSTEM_ENCODING = 'UTF-8'
DEFAULT_OUTPUT_ENCODING = 'UTF-8'
else:
DEFAULT_SYSTEM_ENCODING = 'cp1252'
DEFAULT_OUTPUT_ENCODING = 'cp437'
def get_system_encoding():
platform_getters = [(ANY, _get_python_system_encoding),
(JYTHON, _get_java_system_encoding),
(UNIXY, _get_unixy_encoding),
(WINDOWS, _get_windows_system_encoding)]
return _get_encoding(platform_getters, DEFAULT_SYSTEM_ENCODING)
def get_output_encoding():
platform_getters = [(ANY, _get_stream_output_encoding),
(UNIXY, _get_unixy_encoding),
(WINDOWS, _get_windows_output_encoding)]
return _get_encoding(platform_getters, DEFAULT_OUTPUT_ENCODING)
def _get_encoding(platform_getters, default):
for platform, getter in platform_getters:
if platform:
encoding = getter()
if _is_valid(encoding):
return encoding
return default
def _get_python_system_encoding():
return sys.getfilesystemencoding()
def _get_java_system_encoding():
from java.lang import System
return System.getProperty('file.encoding')
def _get_unixy_encoding():
for name in 'LANG', 'LC_CTYPE', 'LANGUAGE', 'LC_ALL':
if name in os.environ:
# Encoding can be in format like `UTF-8` or `en_US.UTF-8`
encoding = os.environ[name].split('.')[-1]
if _is_valid(encoding):
return encoding
return None
def _get_stream_output_encoding():
# http://bugs.jython.org/issue1568
if WINDOWS and JYTHON:
if sys.platform.startswith('java1.5') or sys.version_info < (2, 5, 2):
return None
# Stream may not have encoding attribute if it is intercepted outside RF
# in Python. Encoding is None if process's outputs are redirected.
for stream in sys.__stdout__, sys.__stderr__, sys.__stdin__:
encoding = getattr(stream, 'encoding', None)
if _is_valid(encoding):
return encoding
return None
def _get_windows_system_encoding():
return _get_code_page('GetACP')
def _get_windows_output_encoding():
return _get_code_page('GetOEMCP')
def _get_code_page(method_name):
from ctypes import cdll
try:
method = getattr(cdll.kernel32, method_name)
except TypeError: # Sometimes occurs w/ IronPython (mainly on CI)
return None
method.argtypes = () # Needed w/ Jython (at least 2.5)
return 'cp%s' % method()
def _is_valid(encoding):
if not encoding:
return False
try:
'test'.encode(encoding)
except LookupError:
return False
else:
return True
# Copyright 2008-2014 Nokia Solutions and Networks
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
# Need different unic implementations for different Pythons because:
# 1) Importing unicodedata module on Jython takes a very long time, and doesn't
# seem to be necessary as Java probably already handles normalization.
# Furthermore, Jython on Java 1.5 doesn't even have unicodedata.normalize.
# 2) IronPython 2.6 doesn't have unicodedata and probably doesn't need it.
# 3) CPython doesn't automatically normalize Unicode strings.
if sys.platform.startswith('java'):
from java.lang import Object, Class
def unic(item, *args):
# http://bugs.jython.org/issue1564
if isinstance(item, Object) and not isinstance(item, Class):
try:
item = item.toString() # http://bugs.jython.org/issue1563
except:
return _unrepresentable_object(item)
return _unic(item, *args)
elif sys.platform == 'cli':
def unic(item, *args):
return _unic(item, *args)
else:
from unicodedata import normalize
def unic(item, *args):
return normalize('NFC', _unic(item, *args))
def _unic(item, *args):
# Based on a recipe from http://code.activestate.com/recipes/466341
try:
return unicode(item, *args)
except UnicodeError:
try:
return u''.join(c if ord(c) < 128 else c.encode('string_escape')
for c in str(item))
except:
return _unrepresentable_object(item)
except:
return _unrepresentable_object(item)
def safe_repr(item):
try:
return unic(repr(item))
except UnicodeError:
return repr(unic(item))
except:
return _unrepresentable_object(item)
if sys.platform == 'cli':
# IronPython omits `u` prefix from `repr(u'foo')`. We add it back to have
# consistent and easier to test log messages.
_safe_repr = safe_repr
def safe_repr(item):
if isinstance(item, list):
return '[%s]' % ', '.join(safe_repr(i) for i in item)
ret = _safe_repr(item)
if isinstance(item, unicode) and not ret.startswith('u'):
ret = 'u' + ret
return ret
def _unrepresentable_object(item):
from robot.utils.error import get_error_message
return u"<Unrepresentable object '%s'. Error: %s>" \
% (item.__class__.__name__, get_error_message())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment