Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save praveenbommali/85b8742d6df13c42dcbc675f091497a8 to your computer and use it in GitHub Desktop.
Save praveenbommali/85b8742d6df13c42dcbc675f091497a8 to your computer and use it in GitHub Desktop.
LeetCode Python Solutions
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.6 (LeetCodeSolutions)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/LeetCodeSolutions.iml" filepath="$PROJECT_DIR$/.idea/LeetCodeSolutions.iml" />
</modules>
</component>
</project>
# Given a string, find the first non-repeating character in it
# and return it's index. If it doesn't exist, return -1.
class Solution:
def firstUniqChar1(self, s):
"""
:type s: str
:rtype: int
"""
len_s = len(s)
for i in range(len_s):
unique = 0
for j in range(0, len_s):
if s[i] not in s[i+1:] and s[i] == s[j]:
unique += 1
if unique == 1:
return i
return -1
def firstUniqChar2(self, s):
pass
if __name__ == '__main__':
solution = Solution()
print(solution.firstUniqChar1("loveleetcode"))
# This file must be used with "source bin/activate" *from bash*
# you cannot run it directly
deactivate () {
# reset old environment variables
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r
fi
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
PS1="${_OLD_VIRTUAL_PS1:-}"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
if [ ! "$1" = "nondestructive" ] ; then
# Self destruct!
unset -f deactivate
fi
}
# unset irrelevant variables
deactivate nondestructive
VIRTUAL_ENV="/Users/praveenbommali/LeetCodeSolutions/venv"
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
PATH="$VIRTUAL_ENV/bin:$PATH"
export PATH
# unset PYTHONHOME if set
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
# could use `if (set -u; : $PYTHONHOME) ;` in bash
if [ -n "${PYTHONHOME:-}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
unset PYTHONHOME
fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
if [ "x(venv) " != x ] ; then
PS1="(venv) ${PS1:-}"
else
if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then
# special case for Aspen magic directories
# see http://www.zetadev.com/software/aspen/
PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1"
else
PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1"
fi
fi
export PS1
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r
fi
# This file must be used with "source bin/activate.csh" *from csh*.
# You cannot run it directly.
# Created by Davide Di Blasi <davidedb@gmail.com>.
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate'
# Unset irrelevant variables.
deactivate nondestructive
setenv VIRTUAL_ENV "/Users/praveenbommali/LeetCodeSolutions/venv"
set _OLD_VIRTUAL_PATH="$PATH"
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
set _OLD_VIRTUAL_PROMPT="$prompt"
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
if ("venv" != "") then
set env_name = "venv"
else
if (`basename "VIRTUAL_ENV"` == "__") then
# special case for Aspen magic directories
# see http://www.zetadev.com/software/aspen/
set env_name = `basename \`dirname "$VIRTUAL_ENV"\``
else
set env_name = `basename "$VIRTUAL_ENV"`
endif
endif
set prompt = "[$env_name] $prompt"
unset env_name
endif
alias pydoc python -m pydoc
rehash
# This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org)
# you cannot run it directly
function deactivate -d "Exit virtualenv and return to normal shell environment"
# reset old environment variables
if test -n "$_OLD_VIRTUAL_PATH"
set -gx PATH $_OLD_VIRTUAL_PATH
set -e _OLD_VIRTUAL_PATH
end
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
set -e _OLD_VIRTUAL_PYTHONHOME
end
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
functions -e fish_prompt
set -e _OLD_FISH_PROMPT_OVERRIDE
functions -c _old_fish_prompt fish_prompt
functions -e _old_fish_prompt
end
set -e VIRTUAL_ENV
if test "$argv[1]" != "nondestructive"
# Self destruct!
functions -e deactivate
end
end
# unset irrelevant variables
deactivate nondestructive
set -gx VIRTUAL_ENV "/Users/praveenbommali/LeetCodeSolutions/venv"
set -gx _OLD_VIRTUAL_PATH $PATH
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
# unset PYTHONHOME if set
if set -q PYTHONHOME
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
set -e PYTHONHOME
end
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
# fish uses a function instead of an env var to generate the prompt.
# save the current fish_prompt function as the function _old_fish_prompt
functions -c fish_prompt _old_fish_prompt
# with the original prompt function renamed, we can override with our own.
function fish_prompt
# Save the return status of the last command
set -l old_status $status
# Prompt override?
if test -n "(venv) "
printf "%s%s" "(venv) " (set_color normal)
else
# ...Otherwise, prepend env
set -l _checkbase (basename "$VIRTUAL_ENV")
if test $_checkbase = "__"
# special case for Aspen magic directories
# see http://www.zetadev.com/software/aspen/
printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal)
else
printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal)
end
end
# Restore the return status of the previous command.
echo "exit $old_status" | .
_old_fish_prompt
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
end
#!/Users/praveenbommali/LeetCodeSolutions/venv/bin/python
# EASY-INSTALL-ENTRY-SCRIPT: 'setuptools==28.8.0','console_scripts','easy_install'
__requires__ = 'setuptools==28.8.0'
import re
import sys
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(
load_entry_point('setuptools==28.8.0', 'console_scripts', 'easy_install')()
)
#!/Users/praveenbommali/LeetCodeSolutions/venv/bin/python
# EASY-INSTALL-ENTRY-SCRIPT: 'setuptools==28.8.0','console_scripts','easy_install-3.6'
__requires__ = 'setuptools==28.8.0'
import re
import sys
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(
load_entry_point('setuptools==28.8.0', 'console_scripts', 'easy_install-3.6')()
)
#!/Users/praveenbommali/LeetCodeSolutions/venv/bin/python
# EASY-INSTALL-ENTRY-SCRIPT: 'pip==9.0.1','console_scripts','pip'
__requires__ = 'pip==9.0.1'
import re
import sys
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(
load_entry_point('pip==9.0.1', 'console_scripts', 'pip')()
)
#!/Users/praveenbommali/LeetCodeSolutions/venv/bin/python
# EASY-INSTALL-ENTRY-SCRIPT: 'pip==9.0.1','console_scripts','pip3'
__requires__ = 'pip==9.0.1'
import re
import sys
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(
load_entry_point('pip==9.0.1', 'console_scripts', 'pip3')()
)
#!/Users/praveenbommali/LeetCodeSolutions/venv/bin/python
# EASY-INSTALL-ENTRY-SCRIPT: 'pip==9.0.1','console_scripts','pip3.6'
__requires__ = 'pip==9.0.1'
import re
import sys
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
sys.exit(
load_entry_point('pip==9.0.1', 'console_scripts', 'pip3.6')()
)
./setuptools-28.8.0-py3.6.egg
./pip-9.0.1-py3.6.egg
Metadata-Version: 1.2
Name: pip
Version: 9.0.1
Summary: The PyPA recommended tool for installing Python packages.
Home-page: https://pip.pypa.io/
Author: The pip developers
Author-email: python-virtualenv@groups.google.com
License: MIT
Description: pip
===
The `PyPA recommended
<https://packaging.python.org/en/latest/current/>`_
tool for installing Python packages.
* `Installation <https://pip.pypa.io/en/stable/installing.html>`_
* `Documentation <https://pip.pypa.io/>`_
* `Changelog <https://pip.pypa.io/en/stable/news.html>`_
* `Github Page <https://github.com/pypa/pip>`_
* `Issue Tracking <https://github.com/pypa/pip/issues>`_
* `User mailing list <http://groups.google.com/group/python-virtualenv>`_
* `Dev mailing list <http://groups.google.com/group/pypa-dev>`_
* User IRC: #pypa on Freenode.
* Dev IRC: #pypa-dev on Freenode.
.. image:: https://img.shields.io/pypi/v/pip.svg
:target: https://pypi.python.org/pypi/pip
.. image:: https://img.shields.io/travis/pypa/pip/master.svg
:target: http://travis-ci.org/pypa/pip
.. image:: https://img.shields.io/appveyor/ci/pypa/pip.svg
:target: https://ci.appveyor.com/project/pypa/pip/history
.. image:: https://readthedocs.org/projects/pip/badge/?version=stable
:target: https://pip.pypa.io/en/stable
Code of Conduct
---------------
Everyone interacting in the pip project's codebases, issue trackers, chat
rooms, and mailing lists is expected to follow the `PyPA Code of Conduct`_.
.. _PyPA Code of Conduct: https://www.pypa.io/en/latest/code-of-conduct/
Keywords: easy_install distutils setuptools egg virtualenv
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Topic :: Software Development :: Build Tools
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: Implementation :: PyPy
Requires-Python: >=2.6,!=3.0.*,!=3.1.*,!=3.2.*
AUTHORS.txt
CHANGES.txt
LICENSE.txt
MANIFEST.in
README.rst
setup.cfg
setup.py
docs/Makefile
docs/__init__.py
docs/conf.py
docs/configuration.rst
docs/cookbook.rst
docs/development.rst
docs/index.rst
docs/installing.rst
docs/logic.rst
docs/make.bat
docs/news.rst
docs/pipext.py
docs/quickstart.rst
docs/usage.rst
docs/user_guide.rst
docs/reference/index.rst
docs/reference/pip.rst
docs/reference/pip_download.rst
docs/reference/pip_freeze.rst
docs/reference/pip_hash.rst
docs/reference/pip_install.rst
docs/reference/pip_list.rst
docs/reference/pip_search.rst
docs/reference/pip_show.rst
docs/reference/pip_uninstall.rst
docs/reference/pip_wheel.rst
pip/__init__.py
pip/__main__.py
pip/basecommand.py
pip/baseparser.py
pip/cmdoptions.py
pip/download.py
pip/exceptions.py
pip/index.py
pip/locations.py
pip/pep425tags.py
pip/status_codes.py
pip/wheel.py
pip.egg-info/PKG-INFO
pip.egg-info/SOURCES.txt
pip.egg-info/dependency_links.txt
pip.egg-info/entry_points.txt
pip.egg-info/not-zip-safe
pip.egg-info/requires.txt
pip.egg-info/top_level.txt
pip/_vendor/README.rst
pip/_vendor/__init__.py
pip/_vendor/appdirs.py
pip/_vendor/distro.py
pip/_vendor/ipaddress.py
pip/_vendor/ordereddict.py
pip/_vendor/pyparsing.py
pip/_vendor/re-vendor.py
pip/_vendor/retrying.py
pip/_vendor/six.py
pip/_vendor/vendor.txt
pip/_vendor/cachecontrol/__init__.py
pip/_vendor/cachecontrol/_cmd.py
pip/_vendor/cachecontrol/adapter.py
pip/_vendor/cachecontrol/cache.py
pip/_vendor/cachecontrol/compat.py
pip/_vendor/cachecontrol/controller.py
pip/_vendor/cachecontrol/filewrapper.py
pip/_vendor/cachecontrol/heuristics.py
pip/_vendor/cachecontrol/serialize.py
pip/_vendor/cachecontrol/wrapper.py
pip/_vendor/cachecontrol/caches/__init__.py
pip/_vendor/cachecontrol/caches/file_cache.py
pip/_vendor/cachecontrol/caches/redis_cache.py
pip/_vendor/colorama/__init__.py
pip/_vendor/colorama/ansi.py
pip/_vendor/colorama/ansitowin32.py
pip/_vendor/colorama/initialise.py
pip/_vendor/colorama/win32.py
pip/_vendor/colorama/winterm.py
pip/_vendor/distlib/__init__.py
pip/_vendor/distlib/compat.py
pip/_vendor/distlib/database.py
pip/_vendor/distlib/index.py
pip/_vendor/distlib/locators.py
pip/_vendor/distlib/manifest.py
pip/_vendor/distlib/markers.py
pip/_vendor/distlib/metadata.py
pip/_vendor/distlib/resources.py
pip/_vendor/distlib/scripts.py
pip/_vendor/distlib/t32.exe
pip/_vendor/distlib/t64.exe
pip/_vendor/distlib/util.py
pip/_vendor/distlib/version.py
pip/_vendor/distlib/w32.exe
pip/_vendor/distlib/w64.exe
pip/_vendor/distlib/wheel.py
pip/_vendor/distlib/_backport/__init__.py
pip/_vendor/distlib/_backport/misc.py
pip/_vendor/distlib/_backport/shutil.py
pip/_vendor/distlib/_backport/sysconfig.cfg
pip/_vendor/distlib/_backport/sysconfig.py
pip/_vendor/distlib/_backport/tarfile.py
pip/_vendor/html5lib/__init__.py
pip/_vendor/html5lib/_ihatexml.py
pip/_vendor/html5lib/_inputstream.py
pip/_vendor/html5lib/_tokenizer.py
pip/_vendor/html5lib/_utils.py
pip/_vendor/html5lib/constants.py
pip/_vendor/html5lib/html5parser.py
pip/_vendor/html5lib/serializer.py
pip/_vendor/html5lib/_trie/__init__.py
pip/_vendor/html5lib/_trie/_base.py
pip/_vendor/html5lib/_trie/datrie.py
pip/_vendor/html5lib/_trie/py.py
pip/_vendor/html5lib/filters/__init__.py
pip/_vendor/html5lib/filters/alphabeticalattributes.py
pip/_vendor/html5lib/filters/base.py
pip/_vendor/html5lib/filters/inject_meta_charset.py
pip/_vendor/html5lib/filters/lint.py
pip/_vendor/html5lib/filters/optionaltags.py
pip/_vendor/html5lib/filters/sanitizer.py
pip/_vendor/html5lib/filters/whitespace.py
pip/_vendor/html5lib/treeadapters/__init__.py
pip/_vendor/html5lib/treeadapters/genshi.py
pip/_vendor/html5lib/treeadapters/sax.py
pip/_vendor/html5lib/treebuilders/__init__.py
pip/_vendor/html5lib/treebuilders/base.py
pip/_vendor/html5lib/treebuilders/dom.py
pip/_vendor/html5lib/treebuilders/etree.py
pip/_vendor/html5lib/treebuilders/etree_lxml.py
pip/_vendor/html5lib/treewalkers/__init__.py
pip/_vendor/html5lib/treewalkers/base.py
pip/_vendor/html5lib/treewalkers/dom.py
pip/_vendor/html5lib/treewalkers/etree.py
pip/_vendor/html5lib/treewalkers/etree_lxml.py
pip/_vendor/html5lib/treewalkers/genshi.py
pip/_vendor/lockfile/__init__.py
pip/_vendor/lockfile/linklockfile.py
pip/_vendor/lockfile/mkdirlockfile.py
pip/_vendor/lockfile/pidlockfile.py
pip/_vendor/lockfile/sqlitelockfile.py
pip/_vendor/lockfile/symlinklockfile.py
pip/_vendor/packaging/__about__.py
pip/_vendor/packaging/__init__.py
pip/_vendor/packaging/_compat.py
pip/_vendor/packaging/_structures.py
pip/_vendor/packaging/markers.py
pip/_vendor/packaging/requirements.py
pip/_vendor/packaging/specifiers.py
pip/_vendor/packaging/utils.py
pip/_vendor/packaging/version.py
pip/_vendor/pkg_resources/__init__.py
pip/_vendor/progress/__init__.py
pip/_vendor/progress/bar.py
pip/_vendor/progress/counter.py
pip/_vendor/progress/helpers.py
pip/_vendor/progress/spinner.py
pip/_vendor/requests/__init__.py
pip/_vendor/requests/adapters.py
pip/_vendor/requests/api.py
pip/_vendor/requests/auth.py
pip/_vendor/requests/cacert.pem
pip/_vendor/requests/certs.py
pip/_vendor/requests/compat.py
pip/_vendor/requests/cookies.py
pip/_vendor/requests/exceptions.py
pip/_vendor/requests/hooks.py
pip/_vendor/requests/models.py
pip/_vendor/requests/sessions.py
pip/_vendor/requests/status_codes.py
pip/_vendor/requests/structures.py
pip/_vendor/requests/utils.py
pip/_vendor/requests/packages/__init__.py
pip/_vendor/requests/packages/chardet/__init__.py
pip/_vendor/requests/packages/chardet/big5freq.py
pip/_vendor/requests/packages/chardet/big5prober.py
pip/_vendor/requests/packages/chardet/chardetect.py
pip/_vendor/requests/packages/chardet/chardistribution.py
pip/_vendor/requests/packages/chardet/charsetgroupprober.py
pip/_vendor/requests/packages/chardet/charsetprober.py
pip/_vendor/requests/packages/chardet/codingstatemachine.py
pip/_vendor/requests/packages/chardet/compat.py
pip/_vendor/requests/packages/chardet/constants.py
pip/_vendor/requests/packages/chardet/cp949prober.py
pip/_vendor/requests/packages/chardet/escprober.py
pip/_vendor/requests/packages/chardet/escsm.py
pip/_vendor/requests/packages/chardet/eucjpprober.py
pip/_vendor/requests/packages/chardet/euckrfreq.py
pip/_vendor/requests/packages/chardet/euckrprober.py
pip/_vendor/requests/packages/chardet/euctwfreq.py
pip/_vendor/requests/packages/chardet/euctwprober.py
pip/_vendor/requests/packages/chardet/gb2312freq.py
pip/_vendor/requests/packages/chardet/gb2312prober.py
pip/_vendor/requests/packages/chardet/hebrewprober.py
pip/_vendor/requests/packages/chardet/jisfreq.py
pip/_vendor/requests/packages/chardet/jpcntx.py
pip/_vendor/requests/packages/chardet/langbulgarianmodel.py
pip/_vendor/requests/packages/chardet/langcyrillicmodel.py
pip/_vendor/requests/packages/chardet/langgreekmodel.py
pip/_vendor/requests/packages/chardet/langhebrewmodel.py
pip/_vendor/requests/packages/chardet/langhungarianmodel.py
pip/_vendor/requests/packages/chardet/langthaimodel.py
pip/_vendor/requests/packages/chardet/latin1prober.py
pip/_vendor/requests/packages/chardet/mbcharsetprober.py
pip/_vendor/requests/packages/chardet/mbcsgroupprober.py
pip/_vendor/requests/packages/chardet/mbcssm.py
pip/_vendor/requests/packages/chardet/sbcharsetprober.py
pip/_vendor/requests/packages/chardet/sbcsgroupprober.py
pip/_vendor/requests/packages/chardet/sjisprober.py
pip/_vendor/requests/packages/chardet/universaldetector.py
pip/_vendor/requests/packages/chardet/utf8prober.py
pip/_vendor/requests/packages/urllib3/__init__.py
pip/_vendor/requests/packages/urllib3/_collections.py
pip/_vendor/requests/packages/urllib3/connection.py
pip/_vendor/requests/packages/urllib3/connectionpool.py
pip/_vendor/requests/packages/urllib3/exceptions.py
pip/_vendor/requests/packages/urllib3/fields.py
pip/_vendor/requests/packages/urllib3/filepost.py
pip/_vendor/requests/packages/urllib3/poolmanager.py
pip/_vendor/requests/packages/urllib3/request.py
pip/_vendor/requests/packages/urllib3/response.py
pip/_vendor/requests/packages/urllib3/contrib/__init__.py
pip/_vendor/requests/packages/urllib3/contrib/appengine.py
pip/_vendor/requests/packages/urllib3/contrib/ntlmpool.py
pip/_vendor/requests/packages/urllib3/contrib/pyopenssl.py
pip/_vendor/requests/packages/urllib3/contrib/socks.py
pip/_vendor/requests/packages/urllib3/packages/__init__.py
pip/_vendor/requests/packages/urllib3/packages/ordered_dict.py
pip/_vendor/requests/packages/urllib3/packages/six.py
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py
pip/_vendor/requests/packages/urllib3/util/__init__.py
pip/_vendor/requests/packages/urllib3/util/connection.py
pip/_vendor/requests/packages/urllib3/util/request.py
pip/_vendor/requests/packages/urllib3/util/response.py
pip/_vendor/requests/packages/urllib3/util/retry.py
pip/_vendor/requests/packages/urllib3/util/ssl_.py
pip/_vendor/requests/packages/urllib3/util/timeout.py
pip/_vendor/requests/packages/urllib3/util/url.py
pip/_vendor/webencodings/__init__.py
pip/_vendor/webencodings/labels.py
pip/_vendor/webencodings/mklabels.py
pip/_vendor/webencodings/tests.py
pip/_vendor/webencodings/x_user_defined.py
pip/commands/__init__.py
pip/commands/check.py
pip/commands/completion.py
pip/commands/download.py
pip/commands/freeze.py
pip/commands/hash.py
pip/commands/help.py
pip/commands/install.py
pip/commands/list.py
pip/commands/search.py
pip/commands/show.py
pip/commands/uninstall.py
pip/commands/wheel.py
pip/compat/__init__.py
pip/compat/dictconfig.py
pip/models/__init__.py
pip/models/index.py
pip/operations/__init__.py
pip/operations/check.py
pip/operations/freeze.py
pip/req/__init__.py
pip/req/req_file.py
pip/req/req_install.py
pip/req/req_set.py
pip/req/req_uninstall.py
pip/utils/__init__.py
pip/utils/appdirs.py
pip/utils/build.py
pip/utils/deprecation.py
pip/utils/encoding.py
pip/utils/filesystem.py
pip/utils/glibc.py
pip/utils/hashes.py
pip/utils/logging.py
pip/utils/outdated.py
pip/utils/packaging.py
pip/utils/setuptools_build.py
pip/utils/ui.py
pip/vcs/__init__.py
pip/vcs/bazaar.py
pip/vcs/git.py
pip/vcs/mercurial.py
pip/vcs/subversion.py
#!/usr/bin/env python
from __future__ import absolute_import
import locale
import logging
import os
import optparse
import warnings
import sys
import re
# 2016-06-17 barry@debian.org: urllib3 1.14 added optional support for socks,
# but if invoked (i.e. imported), it will issue a warning to stderr if socks
# isn't available. requests unconditionally imports urllib3's socks contrib
# module, triggering this warning. The warning breaks DEP-8 tests (because of
# the stderr output) and is just plain annoying in normal usage. I don't want
# to add socks as yet another dependency for pip, nor do I want to allow-stder
# in the DEP-8 tests, so just suppress the warning. pdb tells me this has to
# be done before the import of pip.vcs.
from pip._vendor.requests.packages.urllib3.exceptions import DependencyWarning
warnings.filterwarnings("ignore", category=DependencyWarning) # noqa
from pip.exceptions import InstallationError, CommandError, PipError
from pip.utils import get_installed_distributions, get_prog
from pip.utils import deprecation, dist_is_editable
from pip.vcs import git, mercurial, subversion, bazaar # noqa
from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
from pip.commands import get_summaries, get_similar_commands
from pip.commands import commands_dict
from pip._vendor.requests.packages.urllib3.exceptions import (
InsecureRequestWarning,
)
# assignment for flake8 to be happy
# This fixes a peculiarity when importing via __import__ - as we are
# initialising the pip module, "from pip import cmdoptions" is recursive
# and appears not to work properly in that situation.
import pip.cmdoptions
cmdoptions = pip.cmdoptions
# The version as used in the setup.py and the docs conf.py
__version__ = "9.0.1"
logger = logging.getLogger(__name__)
# Hide the InsecureRequestWarning from urllib3
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
def autocomplete():
"""Command and option completion for the main option parser (and options)
and its subcommands (and options).
Enable by sourcing one of the completion shell scripts (bash, zsh or fish).
"""
# Don't complete if user hasn't sourced bash_completion file.
if 'PIP_AUTO_COMPLETE' not in os.environ:
return
cwords = os.environ['COMP_WORDS'].split()[1:]
cword = int(os.environ['COMP_CWORD'])
try:
current = cwords[cword - 1]
except IndexError:
current = ''
subcommands = [cmd for cmd, summary in get_summaries()]
options = []
# subcommand
try:
subcommand_name = [w for w in cwords if w in subcommands][0]
except IndexError:
subcommand_name = None
parser = create_main_parser()
# subcommand options
if subcommand_name:
# special case: 'help' subcommand has no options
if subcommand_name == 'help':
sys.exit(1)
# special case: list locally installed dists for uninstall command
if subcommand_name == 'uninstall' and not current.startswith('-'):
installed = []
lc = current.lower()
for dist in get_installed_distributions(local_only=True):
if dist.key.startswith(lc) and dist.key not in cwords[1:]:
installed.append(dist.key)
# if there are no dists installed, fall back to option completion
if installed:
for dist in installed:
print(dist)
sys.exit(1)
subcommand = commands_dict[subcommand_name]()
options += [(opt.get_opt_string(), opt.nargs)
for opt in subcommand.parser.option_list_all
if opt.help != optparse.SUPPRESS_HELP]
# filter out previously specified options from available options
prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
options = [(x, v) for (x, v) in options if x not in prev_opts]
# filter options by current input
options = [(k, v) for k, v in options if k.startswith(current)]
for option in options:
opt_label = option[0]
# append '=' to options which require args
if option[1]:
opt_label += '='
print(opt_label)
else:
# show main parser options only when necessary
if current.startswith('-') or current.startswith('--'):
opts = [i.option_list for i in parser.option_groups]
opts.append(parser.option_list)
opts = (o for it in opts for o in it)
subcommands += [i.get_opt_string() for i in opts
if i.help != optparse.SUPPRESS_HELP]
print(' '.join([x for x in subcommands if x.startswith(current)]))
sys.exit(1)
def create_main_parser():
parser_kw = {
'usage': '\n%prog <command> [options]',
'add_help_option': False,
'formatter': UpdatingDefaultsHelpFormatter(),
'name': 'global',
'prog': get_prog(),
}
parser = ConfigOptionParser(**parser_kw)
parser.disable_interspersed_args()
pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
parser.version = 'pip %s from %s (python %s)' % (
__version__, pip_pkg_dir, sys.version[:3])
# add the general options
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
parser.add_option_group(gen_opts)
parser.main = True # so the help formatter knows
# create command listing for description
command_summaries = get_summaries()
description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
parser.description = '\n'.join(description)
return parser
def parseopts(args):
parser = create_main_parser()
# Note: parser calls disable_interspersed_args(), so the result of this
# call is to split the initial args into the general options before the
# subcommand and everything else.
# For example:
# args: ['--timeout=5', 'install', '--user', 'INITools']
# general_options: ['--timeout==5']
# args_else: ['install', '--user', 'INITools']
general_options, args_else = parser.parse_args(args)
# --version
if general_options.version:
sys.stdout.write(parser.version)
sys.stdout.write(os.linesep)
sys.exit()
# pip || pip help -> print_help()
if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
parser.print_help()
sys.exit()
# the subcommand name
cmd_name = args_else[0]
if cmd_name not in commands_dict:
guess = get_similar_commands(cmd_name)
msg = ['unknown command "%s"' % cmd_name]
if guess:
msg.append('maybe you meant "%s"' % guess)
raise CommandError(' - '.join(msg))
# all the args without the subcommand
cmd_args = args[:]
cmd_args.remove(cmd_name)
return cmd_name, cmd_args
def check_isolated(args):
isolated = False
if "--isolated" in args:
isolated = True
return isolated
def main(args=None):
if args is None:
args = sys.argv[1:]
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parseopts(args)
except PipError as exc:
sys.stderr.write("ERROR: %s" % exc)
sys.stderr.write(os.linesep)
sys.exit(1)
# Needed for locale.getpreferredencoding(False) to work
# in pip.utils.encoding.auto_decode
try:
locale.setlocale(locale.LC_ALL, '')
except locale.Error as e:
# setlocale can apparently crash if locale are uninitialized
logger.debug("Ignoring error %s when setting locale", e)
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
return command.main(cmd_args)
# ###########################################################
# # Writing freeze files
class FrozenRequirement(object):
def __init__(self, name, req, editable, comments=()):
self.name = name
self.req = req
self.editable = editable
self.comments = comments
_rev_re = re.compile(r'-r(\d+)$')
_date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
@classmethod
def from_dist(cls, dist, dependency_links):
location = os.path.normcase(os.path.abspath(dist.location))
comments = []
from pip.vcs import vcs, get_src_requirement
if dist_is_editable(dist) and vcs.get_backend_name(location):
editable = True
try:
req = get_src_requirement(dist, location)
except InstallationError as exc:
logger.warning(
"Error when trying to get requirement for VCS system %s, "
"falling back to uneditable format", exc
)
req = None
if req is None:
logger.warning(
'Could not determine repository location of %s', location
)
comments.append(
'## !! Could not determine repository location'
)
req = dist.as_requirement()
editable = False
else:
editable = False
req = dist.as_requirement()
specs = req.specs
assert len(specs) == 1 and specs[0][0] in ["==", "==="], \
'Expected 1 spec with == or ===; specs = %r; dist = %r' % \
(specs, dist)
version = specs[0][1]
ver_match = cls._rev_re.search(version)
date_match = cls._date_re.search(version)
if ver_match or date_match:
svn_backend = vcs.get_backend('svn')
if svn_backend:
svn_location = svn_backend().get_location(
dist,
dependency_links,
)
if not svn_location:
logger.warning(
'Warning: cannot find svn location for %s', req)
comments.append(
'## FIXME: could not find svn URL in dependency_links '
'for this package:'
)
else:
comments.append(
'# Installing as editable to satisfy requirement %s:' %
req
)
if ver_match:
rev = ver_match.group(1)
else:
rev = '{%s}' % date_match.group(1)
editable = True
req = '%s@%s#egg=%s' % (
svn_location,
rev,
cls.egg_name(dist)
)
return cls(dist.project_name, req, editable, comments)
@staticmethod
def egg_name(dist):
name = dist.egg_name()
match = re.search(r'-py\d\.\d$', name)
if match:
name = name[:match.start()]
return name
def __str__(self):
req = self.req
if self.editable:
req = '-e %s' % req
return '\n'.join(list(self.comments) + [str(req)]) + '\n'
if __name__ == '__main__':
sys.exit(main())
from __future__ import absolute_import
import os
import sys
# If we are running from a wheel, add the wheel to sys.path
# This allows the usage python pip-*.whl/pip install pip-*.whl
if __package__ == '':
# __file__ is pip-*.whl/pip/__main__.py
# first dirname call strips of '/__main__.py', second strips off '/pip'
# Resulting path is the name of the wheel itself
# Add that to sys.path so we can import pip
path = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, path)
import pip # noqa
if __name__ == '__main__':
sys.exit(pip.main())
"""
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
depend on something external.
Files inside of pip._vendor should be considered immutable and should only be
updated to versions from upstream.
"""
from __future__ import absolute_import
import glob
import os.path
import sys
# Downstream redistributors which have debundled our dependencies should also
# patch this value to be true. This will trigger the additional patching
# to cause things like "six" to be available as pip.
DEBUNDLED = False
# By default, look in this directory for a bunch of .whl files which we will
# add to the beginning of sys.path before attempting to import anything. This
# is done to support downstream re-distributors like Debian and Fedora who
# wish to create their own Wheels for our dependencies to aid in debundling.
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
# Define a small helper function to alias our vendored modules to the real ones
# if the vendored ones do not exist. This idea of this was taken from
# https://github.com/kennethreitz/requests/pull/2567.
def vendored(modulename):
vendored_name = "{0}.{1}".format(__name__, modulename)
try:
__import__(vendored_name, globals(), locals(), level=0)
except ImportError:
try:
__import__(modulename, globals(), locals(), level=0)
except ImportError:
# We can just silently allow import failures to pass here. If we
# got to this point it means that ``import pip._vendor.whatever``
# failed and so did ``import whatever``. Since we're importing this
# upfront in an attempt to alias imports, not erroring here will
# just mean we get a regular import error whenever pip *actually*
# tries to import one of these modules to use it, which actually
# gives us a better error message than we would have otherwise
# gotten.
pass
else:
sys.modules[vendored_name] = sys.modules[modulename]
base, head = vendored_name.rsplit(".", 1)
setattr(sys.modules[base], head, sys.modules[modulename])
# If we're operating in a debundled setup, then we want to go ahead and trigger
# the aliasing of our vendored libraries as well as looking for wheels to add
# to our sys.path. This will cause all of this code to be a no-op typically
# however downstream redistributors can enable it in a consistent way across
# all platforms.
if DEBUNDLED:
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
# front of our sys.path.
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
# Actually alias all of our vendored dependencies.
vendored("cachecontrol")
vendored("colorama")
vendored("distlib")
vendored("distro")
vendored("html5lib")
vendored("lockfile")
vendored("six")
vendored("six.moves")
vendored("six.moves.urllib")
vendored("packaging")
vendored("packaging.version")
vendored("packaging.specifiers")
vendored("pkg_resources")
vendored("progress")
vendored("retrying")
vendored("requests")
vendored("requests.packages")
vendored("requests.packages.urllib3")
vendored("requests.packages.urllib3._collections")
vendored("requests.packages.urllib3.connection")
vendored("requests.packages.urllib3.connectionpool")
vendored("requests.packages.urllib3.contrib")
vendored("requests.packages.urllib3.contrib.ntlmpool")
vendored("requests.packages.urllib3.contrib.pyopenssl")
vendored("requests.packages.urllib3.exceptions")
vendored("requests.packages.urllib3.fields")
vendored("requests.packages.urllib3.filepost")
vendored("requests.packages.urllib3.packages")
vendored("requests.packages.urllib3.packages.ordered_dict")
vendored("requests.packages.urllib3.packages.six")
vendored("requests.packages.urllib3.packages.ssl_match_hostname")
vendored("requests.packages.urllib3.packages.ssl_match_hostname."
"_implementation")
vendored("requests.packages.urllib3.poolmanager")
vendored("requests.packages.urllib3.request")
vendored("requests.packages.urllib3.response")
vendored("requests.packages.urllib3.util")
vendored("requests.packages.urllib3.util.connection")
vendored("requests.packages.urllib3.util.request")
vendored("requests.packages.urllib3.util.response")
vendored("requests.packages.urllib3.util.retry")
vendored("requests.packages.urllib3.util.ssl_")
vendored("requests.packages.urllib3.util.timeout")
vendored("requests.packages.urllib3.util.url")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2005-2010 ActiveState Software Inc.
# Copyright (c) 2013 Eddy Petrișor
"""Utilities for determining application-specific dirs.
See <http://github.com/ActiveState/appdirs> for details and usage.
"""
# Dev Notes:
# - MSDN on where to store app data files:
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
# - macOS: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
__version_info__ = (1, 4, 0)
__version__ = '.'.join(map(str, __version_info__))
import sys
import os
PY3 = sys.version_info[0] == 3
if PY3:
unicode = str
if sys.platform.startswith('java'):
import platform
os_name = platform.java_ver()[3][0]
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
system = 'win32'
elif os_name.startswith('Mac'): # "macOS", etc.
system = 'darwin'
else: # "Linux", "SunOS", "FreeBSD", etc.
# Setting this to "linux2" is not ideal, but only Windows or Mac
# are actually checked for and the rest of the module expects
# *sys.platform* style strings.
system = 'linux2'
else:
system = sys.platform
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
macOS: ~/Library/Application Support/<AppName>
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
That means, by default "~/.local/share/<AppName>".
"""
if system == "win32":
if appauthor is None:
appauthor = appname
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
path = os.path.normpath(_get_win_folder(const))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('~/Library/Application Support/')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of data dirs should be
returned. By default, the first item from XDG_DATA_DIRS is
returned, or '/usr/local/share/<AppName>',
if XDG_DATA_DIRS is not set
Typical user data directories are:
macOS: /Library/Application Support/<AppName>
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
For Unix, this is using the $XDG_DATA_DIRS[0] default.
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('/Library/Application Support')
if appname:
path = os.path.join(path, appname)
else:
# XDG default for $XDG_DATA_DIRS
# only first, if multipath is False
path = os.getenv('XDG_DATA_DIRS',
os.pathsep.join(['/usr/local/share', '/usr/share']))
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
if appname and version:
path = os.path.join(path, version)
return path
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific config dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
macOS: same as user_data_dir
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
That means, by deafult "~/.config/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of config dirs should be
returned. By default, the first item from XDG_CONFIG_DIRS is
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
Typical user data directories are:
macOS: same as site_data_dir
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
$XDG_CONFIG_DIRS
Win *: same as site_data_dir
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system in ["win32", "darwin"]:
path = site_data_dir(appname, appauthor)
if appname and version:
path = os.path.join(path, version)
else:
# XDG default for $XDG_CONFIG_DIRS
# only first, if multipath is False
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific cache dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Cache" to the base app data dir for Windows. See
discussion below.
Typical user cache directories are:
macOS: ~/Library/Caches/<AppName>
Unix: ~/.cache/<AppName> (XDG default)
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
On Windows the only suggestion in the MSDN docs is that local settings go in
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
app data dir (the default returned by `user_data_dir` above). Apps typically
put cache data somewhere *under* the given dir here. Some examples:
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
...\Acme\SuperApp\Cache\1.0
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
This can be disabled with the `opinion=False` option.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
if opinion:
path = os.path.join(path, "Cache")
elif system == 'darwin':
path = os.path.expanduser('~/Library/Caches')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific log dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Logs" to the base app data dir for Windows, and "log" to the
base cache dir for Unix. See discussion below.
Typical user cache directories are:
macOS: ~/Library/Logs/<AppName>
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
On Windows the only suggestion in the MSDN docs is that local settings
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
examples of what some windows apps use for a logs dir.)
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
value for Windows and appends "log" to the user cache dir for Unix.
This can be disabled with the `opinion=False` option.
"""
if system == "darwin":
path = os.path.join(
os.path.expanduser('~/Library/Logs'),
appname)
elif system == "win32":
path = user_data_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "Logs")
else:
path = user_cache_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "log")
if appname and version:
path = os.path.join(path, version)
return path
class AppDirs(object):
"""Convenience wrapper for getting application dirs."""
def __init__(self, appname, appauthor=None, version=None, roaming=False,
multipath=False):
self.appname = appname
self.appauthor = appauthor
self.version = version
self.roaming = roaming
self.multipath = multipath
@property
def user_data_dir(self):
return user_data_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_data_dir(self):
return site_data_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_config_dir(self):
return user_config_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_config_dir(self):
return site_config_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_cache_dir(self):
return user_cache_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_log_dir(self):
return user_log_dir(self.appname, self.appauthor,
version=self.version)
#---- internal support stuff
def _get_win_folder_from_registry(csidl_name):
"""This is a fallback technique at best. I'm not sure if using the
registry for this guarantees us the correct answer for all CSIDL_*
names.
"""
import _winreg
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
}[csidl_name]
key = _winreg.OpenKey(
_winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
)
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
return dir
def _get_win_folder_with_pywin32(csidl_name):
from win32com.shell import shellcon, shell
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
# Try to make this a unicode path because SHGetFolderPath does
# not return unicode strings when there is unicode data in the
# path.
try:
dir = unicode(dir)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
try:
import win32api
dir = win32api.GetShortPathName(dir)
except ImportError:
pass
except UnicodeError:
pass
return dir
def _get_win_folder_with_ctypes(csidl_name):
import ctypes
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
}[csidl_name]
buf = ctypes.create_unicode_buffer(1024)
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in buf:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf2 = ctypes.create_unicode_buffer(1024)
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
return buf.value
def _get_win_folder_with_jna(csidl_name):
import array
from com.sun import jna
from com.sun.jna.platform import win32
buf_size = win32.WinDef.MAX_PATH * 2
buf = array.zeros('c', buf_size)
shell = win32.Shell32.INSTANCE
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf = array.zeros('c', buf_size)
kernel = win32.Kernel32.INSTANCE
if kernal.GetShortPathName(dir, buf, buf_size):
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
return dir
if system == "win32":
try:
import win32com.shell
_get_win_folder = _get_win_folder_with_pywin32
except ImportError:
try:
from ctypes import windll
_get_win_folder = _get_win_folder_with_ctypes
except ImportError:
try:
import com.sun.jna
_get_win_folder = _get_win_folder_with_jna
except ImportError:
_get_win_folder = _get_win_folder_from_registry
#---- self test code
if __name__ == "__main__":
appname = "MyApp"
appauthor = "MyCompany"
props = ("user_data_dir", "site_data_dir",
"user_config_dir", "site_config_dir",
"user_cache_dir", "user_log_dir")
print("-- app dirs (with optional 'version')")
dirs = AppDirs(appname, appauthor, version="1.0")
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'version')")
dirs = AppDirs(appname, appauthor)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'appauthor')")
dirs = AppDirs(appname)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (with disabled 'appauthor')")
dirs = AppDirs(appname, appauthor=False)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
__version__ = '0.11.7'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
import logging
from pip._vendor import requests
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import logger
from argparse import ArgumentParser
def setup_logging():
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)
def get_session():
adapter = CacheControlAdapter(
DictCache(),
cache_etags=True,
serializer=None,
heuristic=None,
)
sess = requests.Session()
sess.mount('http://', adapter)
sess.mount('https://', adapter)
sess.cache_controller = adapter.controller
return sess
def get_args():
parser = ArgumentParser()
parser.add_argument('url', help='The URL to try and cache')
return parser.parse_args()
def main(args=None):
args = get_args()
sess = get_session()
# Make a request to get a response
resp = sess.get(args.url)
# Turn on logging
setup_logging()
# try setting the cache
sess.cache_controller.cache_response(resp.request, resp.raw)
# Now try to get it
if sess.cache_controller.cached_request(resp.request):
print('Cached!')
else:
print('Not cached :(')
if __name__ == '__main__':
main()
import types
import functools
from pip._vendor.requests.adapters import HTTPAdapter
from .controller import CacheController
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None,
cache_etags=True,
controller_class=None,
serializer=None,
heuristic=None,
*args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.heuristic = heuristic
controller_factory = controller_class or CacheController
self.controller = controller_factory(
self.cache,
cache_etags=cache_etags,
serializer=serializer,
)
def send(self, request, **kw):
"""
Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can.
"""
if request.method == 'GET':
cached_response = self.controller.cached_request(request)
if cached_response:
return self.build_response(request, cached_response,
from_cache=True)
# check for etags and add headers if appropriate
request.headers.update(
self.controller.conditional_headers(request)
)
resp = super(CacheControlAdapter, self).send(request, **kw)
return resp
def build_response(self, request, response, from_cache=False):
"""
Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
if not from_cache and request.method == 'GET':
# Check for any heuristics that might update headers
# before trying to cache.
if self.heuristic:
response = self.heuristic.apply(response)
# apply any expiration heuristics
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
cached_response = self.controller.update_cached_response(
request, response
)
if cached_response is not response:
from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response
# We always cache the 301 responses
elif response.status == 301:
self.controller.cache_response(request, response)
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
response._fp,
functools.partial(
self.controller.cache_response,
request,
response,
)
)
if response.chunked:
super_update_chunk_length = response._update_chunk_length
def _update_chunk_length(self):
super_update_chunk_length()
if self.chunk_left == 0:
self._fp._close()
response._update_chunk_length = types.MethodType(_update_chunk_length, response)
resp = super(CacheControlAdapter, self).build_response(
request, response
)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Give the request a from_cache attr to let people use it
resp.from_cache = from_cache
return resp
def close(self):
self.cache.close()
super(CacheControlAdapter, self).close()
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
def set(self, key, value):
raise NotImplemented()
def delete(self, key):
raise NotImplemented()
def close(self):
pass
class DictCache(BaseCache):
def __init__(self, init_dict=None):
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
with self.lock:
self.data.update({key: value})
def delete(self, key):
with self.lock:
if key in self.data:
self.data.pop(key)
from textwrap import dedent
try:
from .file_cache import FileCache
except ImportError:
notice = dedent('''
NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip:
pip install lockfile
''')
print(notice)
try:
import redis
from .redis_cache import RedisCache
except ImportError:
pass
import hashlib
import os
from pip._vendor.lockfile import LockFile
from pip._vendor.lockfile.mkdirlockfile import MkdirLockFile
from ..cache import BaseCache
from ..controller import CacheController
def _secure_open_write(filename, fmode):
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
# will open *new* files.
# We specify this because we want to ensure that the mode we pass is the
# mode of the file.
flags |= os.O_CREAT | os.O_EXCL
# Do not follow symlinks to prevent someone from making a symlink that
# we follow and insecurely open a cache file.
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
# On Windows we'll mark this file as binary
if hasattr(os, "O_BINARY"):
flags |= os.O_BINARY
# Before we open our file, we want to delete any existing file that is
# there
try:
os.remove(filename)
except (IOError, OSError):
# The file must not exist already, so we can just skip ahead to opening
pass
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
# race condition happens between the os.remove and this line, that an
# error will be raised. Because we utilize a lockfile this should only
# happen if someone is attempting to attack us.
fd = os.open(filename, flags, fmode)
try:
return os.fdopen(fd, "wb")
except:
# An error occurred wrapping our FD in a file object
os.close(fd)
raise
class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700, use_dir_lock=None, lock_class=None):
if use_dir_lock is not None and lock_class is not None:
raise ValueError("Cannot use use_dir_lock and lock_class together")
if use_dir_lock:
lock_class = MkdirLockFile
if lock_class is None:
lock_class = LockFile
self.directory = directory
self.forever = forever
self.filemode = filemode
self.dirmode = dirmode
self.lock_class = lock_class
@staticmethod
def encode(x):
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name):
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key):
name = self._fn(key)
if not os.path.exists(name):
return None
with open(name, 'rb') as fh:
return fh.read()
def set(self, key, value):
name = self._fn(key)
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
except (IOError, OSError):
pass
with self.lock_class(name) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value)
def delete(self, key):
name = self._fn(key)
if not self.forever:
os.remove(name)
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)
from __future__ import division
from datetime import datetime
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return td.total_seconds()
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return (ms + secs * 10**6) / 10**6
class RedisCache(object):
def __init__(self, conn):
self.conn = conn
def get(self, key):
return self.conn.get(key)
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, value)
else:
expires = expires - datetime.now()
self.conn.setex(key, total_seconds(expires), value)
def delete(self, key):
self.conn.delete(key)
def clear(self):
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self):
self.conn.disconnect()
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import cPickle as pickle
except ImportError:
import pickle
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
# Replicate some six behaviour
try:
text_type = (unicode,)
except NameError:
text_type = (str,)
"""
The httplib2 algorithms ported for use with requests.
"""
import logging
import re
import calendar
import time
from email.utils import parsedate_tz
from pip._vendor.requests.structures import CaseInsensitiveDict
from .cache import DictCache
from .serialize import Serializer
logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True, serializer=None):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
@classmethod
def _urlnorm(cls, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
scheme = scheme.lower()
authority = authority.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
@classmethod
def cache_url(cls, uri):
return cls._urlnorm(uri)
def parse_cache_control(self, headers):
"""
Parse the cache control headers returning a dictionary with values
for the different directives.
"""
retval = {}
cc_header = 'cache-control'
if 'Cache-Control' in headers:
cc_header = 'Cache-Control'
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")
]
parts_wo_args = [
(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")
]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, request):
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
cache_url = self.cache_url(request.url)
logger.debug('Looking up "%s" in the cache', cache_url)
cc = self.parse_cache_control(request.headers)
# Bail out if the request insists on fresh data
if 'no-cache' in cc:
logger.debug('Request header has "no-cache", cache bypassed')
return False
if 'max-age' in cc and cc['max-age'] == 0:
logger.debug('Request header has "max_age" as 0, cache bypassed')
return False
# Request allows serving from the cache, let's see if we find something
cache_data = self.cache.get(cache_url)
if cache_data is None:
logger.debug('No cache entry available')
return False
# Check whether it can be deserialized
resp = self.serializer.loads(request, cache_data)
if not resp:
logger.warning('Cache entry deserialization failed, entry ignored')
return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
msg = ('Returning cached "301 Moved Permanently" response '
'(ignoring date and etag information)')
logger.debug(msg)
return resp
headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers:
if 'etag' not in headers:
# Without date or etag, the cached response can never be used
# and should be deleted.
logger.debug('Purging cached response: no date or etag')
self.cache.delete(cache_url)
logger.debug('Ignoring cached response: no date')
return False
now = time.time()
date = calendar.timegm(
parsedate_tz(headers['date'])
)
current_age = max(0, now - date)
logger.debug('Current age based on date: %i', current_age)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(headers)
# determine freshness
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
logger.debug('Freshness lifetime from max-age: %i',
freshness_lifetime)
# If there isn't a max-age, check for an expires header
elif 'expires' in headers:
expires = parsedate_tz(headers['expires'])
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
logger.debug("Freshness lifetime from expires: %i",
freshness_lifetime)
# Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response.
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
logger.debug('Freshness lifetime from request max-age: %i',
freshness_lifetime)
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc:
try:
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
logger.debug('Adjusted current age from min-fresh: %i',
current_age)
# Return entry if it is fresh enough
if freshness_lifetime > current_age:
logger.debug('The response is "fresh", returning cached response')
logger.debug('%i > %i', freshness_lifetime, current_age)
return resp
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in headers:
logger.debug(
'The cached response is "stale" with no etag, purging'
)
self.cache.delete(cache_url)
# return the original handler
return False
def conditional_headers(self, request):
cache_url = self.cache_url(request.url)
resp = self.serializer.loads(request, self.cache.get(cache_url))
new_headers = {}
if resp:
headers = CaseInsensitiveDict(resp.headers)
if 'etag' in headers:
new_headers['If-None-Match'] = headers['ETag']
if 'last-modified' in headers:
new_headers['If-Modified-Since'] = headers['Last-Modified']
return new_headers
def cache_response(self, request, response, body=None):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
cacheable_status_codes = [200, 203, 300, 301]
if response.status not in cacheable_status_codes:
logger.debug(
'Status code %s not in %s',
response.status,
cacheable_status_codes
)
return
response_headers = CaseInsensitiveDict(response.headers)
# If we've been given a body, our response has a Content-Length, that
# Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just
# skip trying to cache it.
if (body is not None and
"content-length" in response_headers and
response_headers["content-length"].isdigit() and
int(response_headers["content-length"]) != len(body)):
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url)
logger.debug('Updating cache with response from "%s"', cache_url)
# Delete it from the cache if we happen to have it stored there
no_store = False
if cc.get('no-store'):
no_store = True
logger.debug('Response header has "no-store"')
if cc_req.get('no-store'):
no_store = True
logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url):
logger.debug('Purging existing cache entry to honor "no-store"')
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers:
logger.debug('Caching due to etag')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
logger.debug('Caching permanant redirect')
self.cache.set(
cache_url,
self.serializer.dumps(request, response)
)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif 'date' in response_headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if cc['max-age'].isdigit() and int(cc['max-age']) > 0:
logger.debug('Caching b/c date exists and max-age > 0')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# If the request can expire, it means we should cache it
# in the meantime.
elif 'expires' in response_headers:
if response_headers['expires']:
logger.debug('Caching b/c of expires header')
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
cached_response = self.serializer.loads(
request,
self.cache.get(cache_url)
)
if not cached_response:
# we didn't have a cached response
return response
# Lets update our headers with the headers from the new request:
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = [
"content-length",
]
cached_response.headers.update(
dict((k, v) for k, v in response.headers.items()
if k.lower() not in excluded_headers)
)
# we want a 200 b/c we have content via the cache
cached_response.status = 200
# update our cache
self.cache.set(
cache_url,
self.serializer.dumps(request, cached_response),
)
return cached_response
from io import BytesIO
class CallbackFileWrapper(object):
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__fp = fp
self.__callback = callback
def __getattr__(self, name):
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__('_CallbackFileWrapper__fp')
return getattr(fp, name)
def __is_fp_closed(self):
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
return self.__fp.closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def _close(self):
if self.__callback:
self.__callback(self.__buf.getvalue())
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data
def _safe_read(self, amt):
data = self.__fp._safe_read(amt)
if amt == 2 and data == b'\r\n':
# urllib executes this read to toss the CRLF at the end
# of the chunk.
return data
self.__buf.write(data)
if self.__is_fp_closed():
self._close()
return data
import calendar
import time
from email.utils import formatdate, parsedate, parsedate_tz
from datetime import datetime, timedelta
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None):
date = date or datetime.now()
return date + delta
def datetime_to_header(dt):
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response):
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response):
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response):
updated_headers = self.update_headers(response)
if updated_headers:
response.headers.update(updated_headers)
warning_header_value = self.warning(response)
if warning_header_value is not None:
response.headers.update({'Warning': warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response):
headers = {}
if 'expires' not in response.headers:
date = parsedate(response.headers['date'])
expires = expire_after(timedelta(days=1),
date=datetime(*date[:6]))
headers['expires'] = datetime_to_header(expires)
headers['cache-control'] = 'public'
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw):
self.delta = timedelta(**kw)
def update_headers(self, response):
expires = expire_after(self.delta)
return {
'expires': datetime_to_header(expires),
'cache-control': 'public',
}
def warning(self, response):
tmpl = '110 - Automatically cached for %s. Response might be stale'
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = set([
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
])
def update_headers(self, resp):
headers = resp.headers
if 'expires' in headers:
return {}
if 'cache-control' in headers and headers['cache-control'] != 'public':
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if 'date' not in headers or 'last-modified' not in headers:
return {}
date = calendar.timegm(parsedate_tz(headers['date']))
last_modified = parsedate(headers['last-modified'])
if date is None or last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp):
return None
import base64
import io
import json
import zlib
from pip._vendor.requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle, text_type
def _b64_encode_bytes(b):
return base64.b64encode(b).decode("ascii")
def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
def _b64_encode(s):
if isinstance(s, text_type):
return _b64_encode_str(s)
return _b64_encode_bytes(s)
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
if body is None:
body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body)
data = {
"response": {
"body": _b64_encode_bytes(body),
"headers": dict(
(_b64_encode(k), _b64_encode(v))
for k, v in response.headers.items()
),
"status": response.status,
"version": response.version,
"reason": _b64_encode_str(response.reason),
"strict": response.strict,
"decode_content": response.decode_content,
},
}
# Construct our vary headers
data["vary"] = {}
if "vary" in response_headers:
varied_headers = response_headers['vary'].split(',')
for header in varied_headers:
header = header.strip()
data["vary"][header] = request.headers.get(header, None)
# Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict(
(_b64_encode(k), _b64_encode(v) if v is not None else v)
for k, v in data["vary"].items()
)
return b",".join([
b"cc=2",
zlib.compress(
json.dumps(
data, separators=(",", ":"), sort_keys=True,
).encode("utf8"),
),
])
def loads(self, request, data):
# Short circuit if we've been given an empty set of data
if not data:
return
# Determine what version of the serializer the data was serialized
# with
try:
ver, data = data.split(b",", 1)
except ValueError:
ver = b"cc=0"
# Make sure that our "ver" is actually a version and isn't a false
# positive from a , being in the data stream.
if ver[:3] != b"cc=":
data = ver + data
ver = b"cc=0"
# Get the version number out of the cc=N
ver = ver.split(b"=", 1)[-1].decode("ascii")
# Dispatch to the actual load method for the given version
try:
return getattr(self, "_loads_v{0}".format(ver))(request, data)
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
return
def prepare_response(self, request, cached):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}):
return
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
body_raw = cached["response"].pop("body")
headers = CaseInsensitiveDict(data=cached['response']['headers'])
if headers.get('transfer-encoding', '') == 'chunked':
headers.pop('transfer-encoding')
cached['response']['headers'] = headers
try:
body = io.BytesIO(body_raw)
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode('utf8'))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
)
def _loads_v0(self, request, data):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
def _loads_v1(self, request, data):
try:
cached = pickle.loads(data)
except ValueError:
return
return self.prepare_response(request, cached)
def _loads_v2(self, request, data):
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except ValueError:
return
# We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes(
cached["response"]["body"]
)
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(
cached["response"]["reason"],
)
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)
from .adapter import CacheControlAdapter
from .cache import DictCache
def CacheControl(sess,
cache=None,
cache_etags=True,
serializer=None,
heuristic=None):
cache = cache or DictCache()
adapter = CacheControlAdapter(
cache,
cache_etags=cache_etags,
serializer=serializer,
heuristic=heuristic,
)
sess.mount('http://', adapter)
sess.mount('https://', adapter)
return sess
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from .initialise import init, deinit, reinit, colorama_text
from .ansi import Fore, Back, Style, Cursor
from .ansitowin32 import AnsiToWin32
__version__ = '0.3.7'
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
'''
This module generates ANSI character codes to printing colors to terminals.
See: http://en.wikipedia.org/wiki/ANSI_escape_code
'''
CSI = '\033['
OSC = '\033]'
BEL = '\007'
def code_to_chars(code):
return CSI + str(code) + 'm'
def set_title(title):
return OSC + '2;' + title + BEL
def clear_screen(mode=2):
return CSI + str(mode) + 'J'
def clear_line(mode=2):
return CSI + str(mode) + 'K'
class AnsiCodes(object):
def __init__(self):
# the subclasses declare class attributes which are numbers.
# Upon instantiation we define instance attributes, which are the same
# as the class attributes but wrapped with the ANSI escape sequence
for name in dir(self):
if not name.startswith('_'):
value = getattr(self, name)
setattr(self, name, code_to_chars(value))
class AnsiCursor(object):
def UP(self, n=1):
return CSI + str(n) + 'A'
def DOWN(self, n=1):
return CSI + str(n) + 'B'
def FORWARD(self, n=1):
return CSI + str(n) + 'C'
def BACK(self, n=1):
return CSI + str(n) + 'D'
def POS(self, x=1, y=1):
return CSI + str(y) + ';' + str(x) + 'H'
class AnsiFore(AnsiCodes):
BLACK = 30
RED = 31
GREEN = 32
YELLOW = 33
BLUE = 34
MAGENTA = 35
CYAN = 36
WHITE = 37
RESET = 39
# These are fairly well supported, but not part of the standard.
LIGHTBLACK_EX = 90
LIGHTRED_EX = 91
LIGHTGREEN_EX = 92
LIGHTYELLOW_EX = 93
LIGHTBLUE_EX = 94
LIGHTMAGENTA_EX = 95
LIGHTCYAN_EX = 96
LIGHTWHITE_EX = 97
class AnsiBack(AnsiCodes):
BLACK = 40
RED = 41
GREEN = 42
YELLOW = 43
BLUE = 44
MAGENTA = 45
CYAN = 46
WHITE = 47
RESET = 49
# These are fairly well supported, but not part of the standard.
LIGHTBLACK_EX = 100
LIGHTRED_EX = 101
LIGHTGREEN_EX = 102
LIGHTYELLOW_EX = 103
LIGHTBLUE_EX = 104
LIGHTMAGENTA_EX = 105
LIGHTCYAN_EX = 106
LIGHTWHITE_EX = 107
class AnsiStyle(AnsiCodes):
BRIGHT = 1
DIM = 2
NORMAL = 22
RESET_ALL = 0
Fore = AnsiFore()
Back = AnsiBack()
Style = AnsiStyle()
Cursor = AnsiCursor()
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import re
import sys
import os
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style
from .winterm import WinTerm, WinColor, WinStyle
from .win32 import windll, winapi_test
winterm = None
if windll is not None:
winterm = WinTerm()
def is_stream_closed(stream):
return not hasattr(stream, 'closed') or stream.closed
def is_a_tty(stream):
return hasattr(stream, 'isatty') and stream.isatty()
class StreamWrapper(object):
'''
Wraps a stream (such as stdout), acting as a transparent proxy for all
attribute access apart from method 'write()', which is delegated to our
Converter instance.
'''
def __init__(self, wrapped, converter):
# double-underscore everything to prevent clashes with names of
# attributes on the wrapped stream object.
self.__wrapped = wrapped
self.__convertor = converter
def __getattr__(self, name):
return getattr(self.__wrapped, name)
def write(self, text):
self.__convertor.write(text)
class AnsiToWin32(object):
'''
Implements a 'write()' method which, on Windows, will strip ANSI character
sequences from the text, and if outputting to a tty, will convert them into
win32 function calls.
'''
ANSI_CSI_RE = re.compile('\001?\033\[((?:\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer
ANSI_OSC_RE = re.compile('\001?\033\]((?:.|;)*?)(\x07)\002?') # Operating System Command
def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
# The wrapped stream (normally sys.stdout or sys.stderr)
self.wrapped = wrapped
# should we reset colors to defaults after every .write()
self.autoreset = autoreset
# create the proxy wrapping our output stream
self.stream = StreamWrapper(wrapped, self)
on_windows = os.name == 'nt'
# We test if the WinAPI works, because even if we are on Windows
# we may be using a terminal that doesn't support the WinAPI
# (e.g. Cygwin Terminal). In this case it's up to the terminal
# to support the ANSI codes.
conversion_supported = on_windows and winapi_test()
# should we strip ANSI sequences from our output?
if strip is None:
strip = conversion_supported or (not is_stream_closed(wrapped) and not is_a_tty(wrapped))
self.strip = strip
# should we should convert ANSI sequences into win32 calls?
if convert is None:
convert = conversion_supported and not is_stream_closed(wrapped) and is_a_tty(wrapped)
self.convert = convert
# dict of ansi codes to win32 functions and parameters
self.win32_calls = self.get_win32_calls()
# are we wrapping stderr?
self.on_stderr = self.wrapped is sys.stderr
def should_wrap(self):
'''
True if this class is actually needed. If false, then the output
stream will not be affected, nor will win32 calls be issued, so
wrapping stdout is not actually required. This will generally be
False on non-Windows platforms, unless optional functionality like
autoreset has been requested using kwargs to init()
'''
return self.convert or self.strip or self.autoreset
def get_win32_calls(self):
if self.convert and winterm:
return {
AnsiStyle.RESET_ALL: (winterm.reset_all, ),
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
AnsiFore.RED: (winterm.fore, WinColor.RED),
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
AnsiFore.RESET: (winterm.fore, ),
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
AnsiBack.RED: (winterm.back, WinColor.RED),
AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
AnsiBack.WHITE: (winterm.back, WinColor.GREY),
AnsiBack.RESET: (winterm.back, ),
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
}
return dict()
def write(self, text):
if self.strip or self.convert:
self.write_and_convert(text)
else:
self.wrapped.write(text)
self.wrapped.flush()
if self.autoreset:
self.reset_all()
def reset_all(self):
if self.convert:
self.call_win32('m', (0,))
elif not self.strip and not is_stream_closed(self.wrapped):
self.wrapped.write(Style.RESET_ALL)
def write_and_convert(self, text):
'''
Write the given text to our wrapped stream, stripping any ANSI
sequences from the text, and optionally converting them into win32
calls.
'''
cursor = 0
text = self.convert_osc(text)
for match in self.ANSI_CSI_RE.finditer(text):
start, end = match.span()
self.write_plain_text(text, cursor, start)
self.convert_ansi(*match.groups())
cursor = end
self.write_plain_text(text, cursor, len(text))
def write_plain_text(self, text, start, end):
if start < end:
self.wrapped.write(text[start:end])
self.wrapped.flush()
def convert_ansi(self, paramstring, command):
if self.convert:
params = self.extract_params(command, paramstring)
self.call_win32(command, params)
def extract_params(self, command, paramstring):
if command in 'Hf':
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
while len(params) < 2:
# defaults:
params = params + (1,)
else:
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
if len(params) == 0:
# defaults:
if command in 'JKm':
params = (0,)
elif command in 'ABCD':
params = (1,)
return params
def call_win32(self, command, params):
if command == 'm':
for param in params:
if param in self.win32_calls:
func_args = self.win32_calls[param]
func = func_args[0]
args = func_args[1:]
kwargs = dict(on_stderr=self.on_stderr)
func(*args, **kwargs)
elif command in 'J':
winterm.erase_screen(params[0], on_stderr=self.on_stderr)
elif command in 'K':
winterm.erase_line(params[0], on_stderr=self.on_stderr)
elif command in 'Hf': # cursor position - absolute
winterm.set_cursor_position(params, on_stderr=self.on_stderr)
elif command in 'ABCD': # cursor position - relative
n = params[0]
# A - up, B - down, C - forward, D - back
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
def convert_osc(self, text):
for match in self.ANSI_OSC_RE.finditer(text):
start, end = match.span()
text = text[:start] + text[end:]
paramstring, command = match.groups()
if command in '\x07': # \x07 = BEL
params = paramstring.split(";")
# 0 - change title and icon (we will only change title)
# 1 - change icon (we don't support this)
# 2 - change title
if params[0] in '02':
winterm.set_title(params[1])
return text
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import atexit
import contextlib
import sys
from .ansitowin32 import AnsiToWin32
orig_stdout = None
orig_stderr = None
wrapped_stdout = None
wrapped_stderr = None
atexit_done = False
def reset_all():
if AnsiToWin32 is not None: # Issue #74: objects might become None at exit
AnsiToWin32(orig_stdout).reset_all()
def init(autoreset=False, convert=None, strip=None, wrap=True):
if not wrap and any([autoreset, convert, strip]):
raise ValueError('wrap=False conflicts with any other arg=True')
global wrapped_stdout, wrapped_stderr
global orig_stdout, orig_stderr
orig_stdout = sys.stdout
orig_stderr = sys.stderr
if sys.stdout is None:
wrapped_stdout = None
else:
sys.stdout = wrapped_stdout = \
wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
if sys.stderr is None:
wrapped_stderr = None
else:
sys.stderr = wrapped_stderr = \
wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
global atexit_done
if not atexit_done:
atexit.register(reset_all)
atexit_done = True
def deinit():
if orig_stdout is not None:
sys.stdout = orig_stdout
if orig_stderr is not None:
sys.stderr = orig_stderr
@contextlib.contextmanager
def colorama_text(*args, **kwargs):
init(*args, **kwargs)
try:
yield
finally:
deinit()
def reinit():
if wrapped_stdout is not None:
sys.stdout = wrapped_stdout
if wrapped_stderr is not None:
sys.stderr = wrapped_stderr
def wrap_stream(stream, convert, strip, autoreset, wrap):
if wrap:
wrapper = AnsiToWin32(stream,
convert=convert, strip=strip, autoreset=autoreset)
if wrapper.should_wrap():
stream = wrapper.stream
return stream
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
# from winbase.h
STDOUT = -11
STDERR = -12
try:
import ctypes
from ctypes import LibraryLoader
windll = LibraryLoader(ctypes.WinDLL)
from ctypes import wintypes
except (AttributeError, ImportError):
windll = None
SetConsoleTextAttribute = lambda *_: None
winapi_test = lambda *_: None
else:
from ctypes import byref, Structure, c_char, POINTER
COORD = wintypes._COORD
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
"""struct in wincon.h."""
_fields_ = [
("dwSize", COORD),
("dwCursorPosition", COORD),
("wAttributes", wintypes.WORD),
("srWindow", wintypes.SMALL_RECT),
("dwMaximumWindowSize", COORD),
]
def __str__(self):
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
self.dwSize.Y, self.dwSize.X
, self.dwCursorPosition.Y, self.dwCursorPosition.X
, self.wAttributes
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
)
_GetStdHandle = windll.kernel32.GetStdHandle
_GetStdHandle.argtypes = [
wintypes.DWORD,
]
_GetStdHandle.restype = wintypes.HANDLE
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
_GetConsoleScreenBufferInfo.argtypes = [
wintypes.HANDLE,
POINTER(CONSOLE_SCREEN_BUFFER_INFO),
]
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
_SetConsoleTextAttribute.argtypes = [
wintypes.HANDLE,
wintypes.WORD,
]
_SetConsoleTextAttribute.restype = wintypes.BOOL
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
_SetConsoleCursorPosition.argtypes = [
wintypes.HANDLE,
COORD,
]
_SetConsoleCursorPosition.restype = wintypes.BOOL
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
_FillConsoleOutputCharacterA.argtypes = [
wintypes.HANDLE,
c_char,
wintypes.DWORD,
COORD,
POINTER(wintypes.DWORD),
]
_FillConsoleOutputCharacterA.restype = wintypes.BOOL
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
_FillConsoleOutputAttribute.argtypes = [
wintypes.HANDLE,
wintypes.WORD,
wintypes.DWORD,
COORD,
POINTER(wintypes.DWORD),
]
_FillConsoleOutputAttribute.restype = wintypes.BOOL
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleA
_SetConsoleTitleW.argtypes = [
wintypes.LPCSTR
]
_SetConsoleTitleW.restype = wintypes.BOOL
handles = {
STDOUT: _GetStdHandle(STDOUT),
STDERR: _GetStdHandle(STDERR),
}
def winapi_test():
handle = handles[STDOUT]
csbi = CONSOLE_SCREEN_BUFFER_INFO()
success = _GetConsoleScreenBufferInfo(
handle, byref(csbi))
return bool(success)
def GetConsoleScreenBufferInfo(stream_id=STDOUT):
handle = handles[stream_id]
csbi = CONSOLE_SCREEN_BUFFER_INFO()
success = _GetConsoleScreenBufferInfo(
handle, byref(csbi))
return csbi
def SetConsoleTextAttribute(stream_id, attrs):
handle = handles[stream_id]
return _SetConsoleTextAttribute(handle, attrs)
def SetConsoleCursorPosition(stream_id, position, adjust=True):
position = COORD(*position)
# If the position is out of range, do nothing.
if position.Y <= 0 or position.X <= 0:
return
# Adjust for Windows' SetConsoleCursorPosition:
# 1. being 0-based, while ANSI is 1-based.
# 2. expecting (x,y), while ANSI uses (y,x).
adjusted_position = COORD(position.Y - 1, position.X - 1)
if adjust:
# Adjust for viewport's scroll position
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
adjusted_position.Y += sr.Top
adjusted_position.X += sr.Left
# Resume normal processing
handle = handles[stream_id]
return _SetConsoleCursorPosition(handle, adjusted_position)
def FillConsoleOutputCharacter(stream_id, char, length, start):
handle = handles[stream_id]
char = c_char(char.encode())
length = wintypes.DWORD(length)
num_written = wintypes.DWORD(0)
# Note that this is hard-coded for ANSI (vs wide) bytes.
success = _FillConsoleOutputCharacterA(
handle, char, length, start, byref(num_written))
return num_written.value
def FillConsoleOutputAttribute(stream_id, attr, length, start):
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
handle = handles[stream_id]
attribute = wintypes.WORD(attr)
length = wintypes.DWORD(length)
num_written = wintypes.DWORD(0)
# Note that this is hard-coded for ANSI (vs wide) bytes.
return _FillConsoleOutputAttribute(
handle, attribute, length, start, byref(num_written))
def SetConsoleTitle(title):
return _SetConsoleTitleW(title)
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from . import win32
# from wincon.h
class WinColor(object):
BLACK = 0
BLUE = 1
GREEN = 2
CYAN = 3
RED = 4
MAGENTA = 5
YELLOW = 6
GREY = 7
# from wincon.h
class WinStyle(object):
NORMAL = 0x00 # dim text, dim background
BRIGHT = 0x08 # bright text, dim background
BRIGHT_BACKGROUND = 0x80 # dim text, bright background
class WinTerm(object):
def __init__(self):
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
self.set_attrs(self._default)
self._default_fore = self._fore
self._default_back = self._back
self._default_style = self._style
# In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style.
# So that LIGHT_EX colors and BRIGHT style do not clobber each other,
# we track them separately, since LIGHT_EX is overwritten by Fore/Back
# and BRIGHT is overwritten by Style codes.
self._light = 0
def get_attrs(self):
return self._fore + self._back * 16 + (self._style | self._light)
def set_attrs(self, value):
self._fore = value & 7
self._back = (value >> 4) & 7
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
def reset_all(self, on_stderr=None):
self.set_attrs(self._default)
self.set_console(attrs=self._default)
def fore(self, fore=None, light=False, on_stderr=False):
if fore is None:
fore = self._default_fore
self._fore = fore
# Emulate LIGHT_EX with BRIGHT Style
if light:
self._light |= WinStyle.BRIGHT
else:
self._light &= ~WinStyle.BRIGHT
self.set_console(on_stderr=on_stderr)
def back(self, back=None, light=False, on_stderr=False):
if back is None:
back = self._default_back
self._back = back
# Emulate LIGHT_EX with BRIGHT_BACKGROUND Style
if light:
self._light |= WinStyle.BRIGHT_BACKGROUND
else:
self._light &= ~WinStyle.BRIGHT_BACKGROUND
self.set_console(on_stderr=on_stderr)
def style(self, style=None, on_stderr=False):
if style is None:
style = self._default_style
self._style = style
self.set_console(on_stderr=on_stderr)
def set_console(self, attrs=None, on_stderr=False):
if attrs is None:
attrs = self.get_attrs()
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
win32.SetConsoleTextAttribute(handle, attrs)
def get_position(self, handle):
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
# Because Windows coordinates are 0-based,
# and win32.SetConsoleCursorPosition expects 1-based.
position.X += 1
position.Y += 1
return position
def set_cursor_position(self, position=None, on_stderr=False):
if position is None:
# I'm not currently tracking the position, so there is no default.
# position = self.get_position()
return
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
win32.SetConsoleCursorPosition(handle, position)
def cursor_adjust(self, x, y, on_stderr=False):
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
position = self.get_position(handle)
adjusted_position = (position.Y + y, position.X + x)
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
def erase_screen(self, mode=0, on_stderr=False):
# 0 should clear from the cursor to the end of the screen.
# 1 should clear from the cursor to the beginning of the screen.
# 2 should clear the entire screen, and move cursor to (1,1)
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
csbi = win32.GetConsoleScreenBufferInfo(handle)
# get the number of character cells in the current buffer
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
# get number of character cells before current cursor position
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
if mode == 0:
from_coord = csbi.dwCursorPosition
cells_to_erase = cells_in_screen - cells_before_cursor
if mode == 1:
from_coord = win32.COORD(0, 0)
cells_to_erase = cells_before_cursor
elif mode == 2:
from_coord = win32.COORD(0, 0)
cells_to_erase = cells_in_screen
# fill the entire screen with blanks
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
# now set the buffer's attributes accordingly
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
if mode == 2:
# put the cursor where needed
win32.SetConsoleCursorPosition(handle, (1, 1))
def erase_line(self, mode=0, on_stderr=False):
# 0 should clear from the cursor to the end of the line.
# 1 should clear from the cursor to the beginning of the line.
# 2 should clear the entire line.
handle = win32.STDOUT
if on_stderr:
handle = win32.STDERR
csbi = win32.GetConsoleScreenBufferInfo(handle)
if mode == 0:
from_coord = csbi.dwCursorPosition
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
if mode == 1:
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
cells_to_erase = csbi.dwCursorPosition.X
elif mode == 2:
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
cells_to_erase = csbi.dwSize.X
# fill the entire screen with blanks
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
# now set the buffer's attributes accordingly
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
def set_title(self, title):
win32.SetConsoleTitle(title)
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2016 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import logging
__version__ = '0.2.4'
class DistlibException(Exception):
pass
try:
from logging import NullHandler
except ImportError: # pragma: no cover
class NullHandler(logging.Handler):
def handle(self, record): pass
def emit(self, record): pass
def createLock(self): self.lock = None
logger = logging.getLogger(__name__)
logger.addHandler(NullHandler())
"""Modules copied from Python 3 standard libraries, for internal use only.
Individual classes and functions are found in d2._backport.misc. Intended
usage is to always import things missing from 3.1 from that module: the
built-in/stdlib objects will be used if found.
"""
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""Backports for individual classes and functions."""
import os
import sys
__all__ = ['cache_from_source', 'callable', 'fsencode']
try:
from imp import cache_from_source
except ImportError:
def cache_from_source(py_file, debug=__debug__):
ext = debug and 'c' or 'o'
return py_file + ext
try:
callable = callable
except NameError:
from collections import Callable
def callable(obj):
return isinstance(obj, Callable)
try:
fsencode = os.fsencode
except AttributeError:
def fsencode(filename):
if isinstance(filename, bytes):
return filename
elif isinstance(filename, str):
return filename.encode(sys.getfilesystemencoding())
else:
raise TypeError("expect bytes or str, not %s" %
type(filename).__name__)
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""Utility functions for copying and archiving files and directory trees.
XXX The functions here don't copy the resource fork or other metadata on Mac.
"""
import os
import sys
import stat
from os.path import abspath
import fnmatch
import collections
import errno
from . import tarfile
try:
import bz2
_BZ2_SUPPORTED = True
except ImportError:
_BZ2_SUPPORTED = False
try:
from pwd import getpwnam
except ImportError:
getpwnam = None
try:
from grp import getgrnam
except ImportError:
getgrnam = None
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"copytree", "move", "rmtree", "Error", "SpecialFileError",
"ExecError", "make_archive", "get_archive_formats",
"register_archive_format", "unregister_archive_format",
"get_unpack_formats", "register_unpack_format",
"unregister_unpack_format", "unpack_archive", "ignore_patterns"]
class Error(EnvironmentError):
pass
class SpecialFileError(EnvironmentError):
"""Raised when trying to do a kind of operation (e.g. copying) which is
not supported on a special file (e.g. a named pipe)"""
class ExecError(EnvironmentError):
"""Raised when a command could not be executed"""
class ReadError(EnvironmentError):
"""Raised when an archive cannot be read"""
class RegistryError(Exception):
"""Raised when a registry operation with the archiving
and unpacking registries fails"""
try:
WindowsError
except NameError:
WindowsError = None
def copyfileobj(fsrc, fdst, length=16*1024):
"""copy data from file-like object fsrc to file-like object fdst"""
while 1:
buf = fsrc.read(length)
if not buf:
break
fdst.write(buf)
def _samefile(src, dst):
# Macintosh, Unix.
if hasattr(os.path, 'samefile'):
try:
return os.path.samefile(src, dst)
except OSError:
return False
# All other platforms: check for same pathname.
return (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
def copyfile(src, dst):
"""Copy data from src to dst"""
if _samefile(src, dst):
raise Error("`%s` and `%s` are the same file" % (src, dst))
for fn in [src, dst]:
try:
st = os.stat(fn)
except OSError:
# File most likely does not exist
pass
else:
# XXX What about other special files? (sockets, devices...)
if stat.S_ISFIFO(st.st_mode):
raise SpecialFileError("`%s` is a named pipe" % fn)
with open(src, 'rb') as fsrc:
with open(dst, 'wb') as fdst:
copyfileobj(fsrc, fdst)
def copymode(src, dst):
"""Copy mode bits from src to dst"""
if hasattr(os, 'chmod'):
st = os.stat(src)
mode = stat.S_IMODE(st.st_mode)
os.chmod(dst, mode)
def copystat(src, dst):
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
st = os.stat(src)
mode = stat.S_IMODE(st.st_mode)
if hasattr(os, 'utime'):
os.utime(dst, (st.st_atime, st.st_mtime))
if hasattr(os, 'chmod'):
os.chmod(dst, mode)
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
try:
os.chflags(dst, st.st_flags)
except OSError as why:
if (not hasattr(errno, 'EOPNOTSUPP') or
why.errno != errno.EOPNOTSUPP):
raise
def copy(src, dst):
"""Copy data and mode bits ("cp src dst").
The destination may be a directory.
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
copyfile(src, dst)
copymode(src, dst)
def copy2(src, dst):
"""Copy data and all stat info ("cp -p src dst").
The destination may be a directory.
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
copyfile(src, dst)
copystat(src, dst)
def ignore_patterns(*patterns):
"""Function that can be used as copytree() ignore parameter.
Patterns is a sequence of glob-style patterns
that are used to exclude files"""
def _ignore_patterns(path, names):
ignored_names = []
for pattern in patterns:
ignored_names.extend(fnmatch.filter(names, pattern))
return set(ignored_names)
return _ignore_patterns
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
ignore_dangling_symlinks=False):
"""Recursively copy a directory tree.
The destination directory must not already exist.
If exception(s) occur, an Error is raised with a list of reasons.
If the optional symlinks flag is true, symbolic links in the
source tree result in symbolic links in the destination tree; if
it is false, the contents of the files pointed to by symbolic
links are copied. If the file pointed by the symlink doesn't
exist, an exception will be added in the list of errors raised in
an Error exception at the end of the copy process.
You can set the optional ignore_dangling_symlinks flag to true if you
want to silence this exception. Notice that this has no effect on
platforms that don't support os.symlink.
The optional ignore argument is a callable. If given, it
is called with the `src` parameter, which is the directory
being visited by copytree(), and `names` which is the list of
`src` contents, as returned by os.listdir():
callable(src, names) -> ignored_names
Since copytree() is called recursively, the callable will be
called once for each directory that is copied. It returns a
list of names relative to the `src` directory that should
not be copied.
The optional copy_function argument is a callable that will be used
to copy each file. It will be called with the source path and the
destination path as arguments. By default, copy2() is used, but any
function that supports the same signature (like copy()) can be used.
"""
names = os.listdir(src)
if ignore is not None:
ignored_names = ignore(src, names)
else:
ignored_names = set()
os.makedirs(dst)
errors = []
for name in names:
if name in ignored_names:
continue
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
try:
if os.path.islink(srcname):
linkto = os.readlink(srcname)
if symlinks:
os.symlink(linkto, dstname)
else:
# ignore dangling symlink if the flag is on
if not os.path.exists(linkto) and ignore_dangling_symlinks:
continue
# otherwise let the copy occurs. copy2 will raise an error
copy_function(srcname, dstname)
elif os.path.isdir(srcname):
copytree(srcname, dstname, symlinks, ignore, copy_function)
else:
# Will raise a SpecialFileError for unsupported file types
copy_function(srcname, dstname)
# catch the Error from the recursive copytree so that we can
# continue with other files
except Error as err:
errors.extend(err.args[0])
except EnvironmentError as why:
errors.append((srcname, dstname, str(why)))
try:
copystat(src, dst)
except OSError as why:
if WindowsError is not None and isinstance(why, WindowsError):
# Copying file access times may fail on Windows
pass
else:
errors.extend((src, dst, str(why)))
if errors:
raise Error(errors)
def rmtree(path, ignore_errors=False, onerror=None):
"""Recursively delete a directory tree.
If ignore_errors is set, errors are ignored; otherwise, if onerror
is set, it is called to handle the error with arguments (func,
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
path is the argument to that function that caused it to fail; and
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
is false and onerror is None, an exception is raised.
"""
if ignore_errors:
def onerror(*args):
pass
elif onerror is None:
def onerror(*args):
raise
try:
if os.path.islink(path):
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError:
onerror(os.path.islink, path, sys.exc_info())
# can't continue even if onerror hook returns
return
names = []
try:
names = os.listdir(path)
except os.error:
onerror(os.listdir, path, sys.exc_info())
for name in names:
fullname = os.path.join(path, name)
try:
mode = os.lstat(fullname).st_mode
except os.error:
mode = 0
if stat.S_ISDIR(mode):
rmtree(fullname, ignore_errors, onerror)
else:
try:
os.remove(fullname)
except os.error:
onerror(os.remove, fullname, sys.exc_info())
try:
os.rmdir(path)
except os.error:
onerror(os.rmdir, path, sys.exc_info())
def _basename(path):
# A basename() variant which first strips the trailing slash, if present.
# Thus we always get the last component of the path, even for directories.
return os.path.basename(path.rstrip(os.path.sep))
def move(src, dst):
"""Recursively move a file or directory to another location. This is
similar to the Unix "mv" command.
If the destination is a directory or a symlink to a directory, the source
is moved inside the directory. The destination path must not already
exist.
If the destination already exists but is not a directory, it may be
overwritten depending on os.rename() semantics.
If the destination is on our current filesystem, then rename() is used.
Otherwise, src is copied to the destination and then removed.
A lot more could be done here... A look at a mv.c shows a lot of
the issues this implementation glosses over.
"""
real_dst = dst
if os.path.isdir(dst):
if _samefile(src, dst):
# We might be on a case insensitive filesystem,
# perform the rename anyway.
os.rename(src, dst)
return
real_dst = os.path.join(dst, _basename(src))
if os.path.exists(real_dst):
raise Error("Destination path '%s' already exists" % real_dst)
try:
os.rename(src, real_dst)
except OSError:
if os.path.isdir(src):
if _destinsrc(src, dst):
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
copytree(src, real_dst, symlinks=True)
rmtree(src)
else:
copy2(src, real_dst)
os.unlink(src)
def _destinsrc(src, dst):
src = abspath(src)
dst = abspath(dst)
if not src.endswith(os.path.sep):
src += os.path.sep
if not dst.endswith(os.path.sep):
dst += os.path.sep
return dst.startswith(src)
def _get_gid(name):
"""Returns a gid, given a group name."""
if getgrnam is None or name is None:
return None
try:
result = getgrnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None
def _get_uid(name):
"""Returns an uid, given a user name."""
if getpwnam is None or name is None:
return None
try:
result = getpwnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
owner=None, group=None, logger=None):
"""Create a (possibly compressed) tar file from all the files under
'base_dir'.
'compress' must be "gzip" (the default), "bzip2", or None.
'owner' and 'group' can be used to define an owner and a group for the
archive that is being built. If not provided, the current owner and group
will be used.
The output tar file will be named 'base_name' + ".tar", possibly plus
the appropriate compression extension (".gz", or ".bz2").
Returns the output filename.
"""
tar_compression = {'gzip': 'gz', None: ''}
compress_ext = {'gzip': '.gz'}
if _BZ2_SUPPORTED:
tar_compression['bzip2'] = 'bz2'
compress_ext['bzip2'] = '.bz2'
# flags for compression program, each element of list will be an argument
if compress is not None and compress not in compress_ext:
raise ValueError("bad value for 'compress', or compression format not "
"supported : {0}".format(compress))
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
archive_dir = os.path.dirname(archive_name)
if not os.path.exists(archive_dir):
if logger is not None:
logger.info("creating %s", archive_dir)
if not dry_run:
os.makedirs(archive_dir)
# creating the tarball
if logger is not None:
logger.info('Creating tar archive')
uid = _get_uid(owner)
gid = _get_gid(group)
def _set_uid_gid(tarinfo):
if gid is not None:
tarinfo.gid = gid
tarinfo.gname = group
if uid is not None:
tarinfo.uid = uid
tarinfo.uname = owner
return tarinfo
if not dry_run:
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
try:
tar.add(base_dir, filter=_set_uid_gid)
finally:
tar.close()
return archive_name
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
# XXX see if we want to keep an external call here
if verbose:
zipoptions = "-r"
else:
zipoptions = "-rq"
from distutils.errors import DistutilsExecError
from distutils.spawn import spawn
try:
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
except DistutilsExecError:
# XXX really should distinguish between "couldn't find
# external 'zip' command" and "zip failed".
raise ExecError("unable to create zip file '%s': "
"could neither import the 'zipfile' module nor "
"find a standalone zip utility") % zip_filename
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
"""Create a zip file from all the files under 'base_dir'.
The output zip file will be named 'base_name' + ".zip". Uses either the
"zipfile" Python module (if available) or the InfoZIP "zip" utility
(if installed and found on the default search path). If neither tool is
available, raises ExecError. Returns the name of the output zip
file.
"""
zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)
if not os.path.exists(archive_dir):
if logger is not None:
logger.info("creating %s", archive_dir)
if not dry_run:
os.makedirs(archive_dir)
# If zipfile module is not available, try spawning an external 'zip'
# command.
try:
import zipfile
except ImportError:
zipfile = None
if zipfile is None:
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
else:
if logger is not None:
logger.info("creating '%s' and adding '%s' to it",
zip_filename, base_dir)
if not dry_run:
zip = zipfile.ZipFile(zip_filename, "w",
compression=zipfile.ZIP_DEFLATED)
for dirpath, dirnames, filenames in os.walk(base_dir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
if os.path.isfile(path):
zip.write(path, path)
if logger is not None:
logger.info("adding '%s'", path)
zip.close()
return zip_filename
_ARCHIVE_FORMATS = {
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
'zip': (_make_zipfile, [], "ZIP file"),
}
if _BZ2_SUPPORTED:
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
"bzip2'ed tar-file")
def get_archive_formats():
"""Returns a list of supported formats for archiving and unarchiving.
Each element of the returned sequence is a tuple (name, description)
"""
formats = [(name, registry[2]) for name, registry in
_ARCHIVE_FORMATS.items()]
formats.sort()
return formats
def register_archive_format(name, function, extra_args=None, description=''):
"""Registers an archive format.
name is the name of the format. function is the callable that will be
used to create archives. If provided, extra_args is a sequence of
(name, value) tuples that will be passed as arguments to the callable.
description can be provided to describe the format, and will be returned
by the get_archive_formats() function.
"""
if extra_args is None:
extra_args = []
if not isinstance(function, collections.Callable):
raise TypeError('The %s object is not callable' % function)
if not isinstance(extra_args, (tuple, list)):
raise TypeError('extra_args needs to be a sequence')
for element in extra_args:
if not isinstance(element, (tuple, list)) or len(element) !=2:
raise TypeError('extra_args elements are : (arg_name, value)')
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
def unregister_archive_format(name):
del _ARCHIVE_FORMATS[name]
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
dry_run=0, owner=None, group=None, logger=None):
"""Create an archive file (eg. zip or tar).
'base_name' is the name of the file to create, minus any format-specific
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
or "gztar".
'root_dir' is a directory that will be the root directory of the
archive; ie. we typically chdir into 'root_dir' before creating the
archive. 'base_dir' is the directory where we start archiving from;
ie. 'base_dir' will be the common prefix of all files and
directories in the archive. 'root_dir' and 'base_dir' both default
to the current directory. Returns the name of the archive file.
'owner' and 'group' are used when creating a tar archive. By default,
uses the current owner and group.
"""
save_cwd = os.getcwd()
if root_dir is not None:
if logger is not None:
logger.debug("changing into '%s'", root_dir)
base_name = os.path.abspath(base_name)
if not dry_run:
os.chdir(root_dir)
if base_dir is None:
base_dir = os.curdir
kwargs = {'dry_run': dry_run, 'logger': logger}
try:
format_info = _ARCHIVE_FORMATS[format]
except KeyError:
raise ValueError("unknown archive format '%s'" % format)
func = format_info[0]
for arg, val in format_info[1]:
kwargs[arg] = val
if format != 'zip':
kwargs['owner'] = owner
kwargs['group'] = group
try:
filename = func(base_name, base_dir, **kwargs)
finally:
if root_dir is not None:
if logger is not None:
logger.debug("changing back to '%s'", save_cwd)
os.chdir(save_cwd)
return filename
def get_unpack_formats():
"""Returns a list of supported formats for unpacking.
Each element of the returned sequence is a tuple
(name, extensions, description)
"""
formats = [(name, info[0], info[3]) for name, info in
_UNPACK_FORMATS.items()]
formats.sort()
return formats
def _check_unpack_options(extensions, function, extra_args):
"""Checks what gets registered as an unpacker."""
# first make sure no other unpacker is registered for this extension
existing_extensions = {}
for name, info in _UNPACK_FORMATS.items():
for ext in info[0]:
existing_extensions[ext] = name
for extension in extensions:
if extension in existing_extensions:
msg = '%s is already registered for "%s"'
raise RegistryError(msg % (extension,
existing_extensions[extension]))
if not isinstance(function, collections.Callable):
raise TypeError('The registered function must be a callable')
def register_unpack_format(name, extensions, function, extra_args=None,
description=''):
"""Registers an unpack format.
`name` is the name of the format. `extensions` is a list of extensions
corresponding to the format.
`function` is the callable that will be
used to unpack archives. The callable will receive archives to unpack.
If it's unable to handle an archive, it needs to raise a ReadError
exception.
If provided, `extra_args` is a sequence of
(name, value) tuples that will be passed as arguments to the callable.
description can be provided to describe the format, and will be returned
by the get_unpack_formats() function.
"""
if extra_args is None:
extra_args = []
_check_unpack_options(extensions, function, extra_args)
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
def unregister_unpack_format(name):
"""Removes the pack format from the registry."""
del _UNPACK_FORMATS[name]
def _ensure_directory(path):
"""Ensure that the parent directory of `path` exists"""
dirname = os.path.dirname(path)
if not os.path.isdir(dirname):
os.makedirs(dirname)
def _unpack_zipfile(filename, extract_dir):
"""Unpack zip `filename` to `extract_dir`
"""
try:
import zipfile
except ImportError:
raise ReadError('zlib not supported, cannot unpack this archive.')
if not zipfile.is_zipfile(filename):
raise ReadError("%s is not a zip file" % filename)
zip = zipfile.ZipFile(filename)
try:
for info in zip.infolist():
name = info.filename
# don't extract absolute paths or ones with .. in them
if name.startswith('/') or '..' in name:
continue
target = os.path.join(extract_dir, *name.split('/'))
if not target:
continue
_ensure_directory(target)
if not name.endswith('/'):
# file
data = zip.read(info.filename)
f = open(target, 'wb')
try:
f.write(data)
finally:
f.close()
del data
finally:
zip.close()
def _unpack_tarfile(filename, extract_dir):
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
"""
try:
tarobj = tarfile.open(filename)
except tarfile.TarError:
raise ReadError(
"%s is not a compressed or uncompressed tar file" % filename)
try:
tarobj.extractall(extract_dir)
finally:
tarobj.close()
_UNPACK_FORMATS = {
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
}
if _BZ2_SUPPORTED:
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
"bzip2'ed tar-file")
def _find_unpack_format(filename):
for name, info in _UNPACK_FORMATS.items():
for extension in info[0]:
if filename.endswith(extension):
return name
return None
def unpack_archive(filename, extract_dir=None, format=None):
"""Unpack an archive.
`filename` is the name of the archive.
`extract_dir` is the name of the target directory, where the archive
is unpacked. If not provided, the current working directory is used.
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
other registered format. If not provided, unpack_archive will use the
filename extension and see if an unpacker was registered for that
extension.
In case none is found, a ValueError is raised.
"""
if extract_dir is None:
extract_dir = os.getcwd()
if format is not None:
try:
format_info = _UNPACK_FORMATS[format]
except KeyError:
raise ValueError("Unknown unpack format '{0}'".format(format))
func = format_info[1]
func(filename, extract_dir, **dict(format_info[2]))
else:
# we need to look at the registered unpackers supported extensions
format = _find_unpack_format(filename)
if format is None:
raise ReadError("Unknown archive format '{0}'".format(filename))
func = _UNPACK_FORMATS[format][1]
kwargs = dict(_UNPACK_FORMATS[format][2])
func(filename, extract_dir, **kwargs)
[posix_prefix]
# Configuration directories. Some of these come straight out of the
# configure script. They are for implementing the other variables, not to
# be used directly in [resource_locations].
confdir = /etc
datadir = /usr/share
libdir = /usr/lib
statedir = /var
# User resource directory
local = ~/.local/{distribution.name}
stdlib = {base}/lib/python{py_version_short}
platstdlib = {platbase}/lib/python{py_version_short}
purelib = {base}/lib/python{py_version_short}/site-packages
platlib = {platbase}/lib/python{py_version_short}/site-packages
include = {base}/include/python{py_version_short}{abiflags}
platinclude = {platbase}/include/python{py_version_short}{abiflags}
data = {base}
[posix_home]
stdlib = {base}/lib/python
platstdlib = {base}/lib/python
purelib = {base}/lib/python
platlib = {base}/lib/python
include = {base}/include/python
platinclude = {base}/include/python
scripts = {base}/bin
data = {base}
[nt]
stdlib = {base}/Lib
platstdlib = {base}/Lib
purelib = {base}/Lib/site-packages
platlib = {base}/Lib/site-packages
include = {base}/Include
platinclude = {base}/Include
scripts = {base}/Scripts
data = {base}
[os2]
stdlib = {base}/Lib
platstdlib = {base}/Lib
purelib = {base}/Lib/site-packages
platlib = {base}/Lib/site-packages
include = {base}/Include
platinclude = {base}/Include
scripts = {base}/Scripts
data = {base}
[os2_home]
stdlib = {userbase}/lib/python{py_version_short}
platstdlib = {userbase}/lib/python{py_version_short}
purelib = {userbase}/lib/python{py_version_short}/site-packages
platlib = {userbase}/lib/python{py_version_short}/site-packages
include = {userbase}/include/python{py_version_short}
scripts = {userbase}/bin
data = {userbase}
[nt_user]
stdlib = {userbase}/Python{py_version_nodot}
platstdlib = {userbase}/Python{py_version_nodot}
purelib = {userbase}/Python{py_version_nodot}/site-packages
platlib = {userbase}/Python{py_version_nodot}/site-packages
include = {userbase}/Python{py_version_nodot}/Include
scripts = {userbase}/Scripts
data = {userbase}
[posix_user]
stdlib = {userbase}/lib/python{py_version_short}
platstdlib = {userbase}/lib/python{py_version_short}
purelib = {userbase}/lib/python{py_version_short}/site-packages
platlib = {userbase}/lib/python{py_version_short}/site-packages
include = {userbase}/include/python{py_version_short}
scripts = {userbase}/bin
data = {userbase}
[osx_framework_user]
stdlib = {userbase}/lib/python
platstdlib = {userbase}/lib/python
purelib = {userbase}/lib/python/site-packages
platlib = {userbase}/lib/python/site-packages
include = {userbase}/include
scripts = {userbase}/bin
data = {userbase}
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""Access to Python's configuration information."""
import codecs
import os
import re
import sys
from os.path import pardir, realpath
try:
import configparser
except ImportError:
import ConfigParser as configparser
__all__ = [
'get_config_h_filename',
'get_config_var',
'get_config_vars',
'get_makefile_filename',
'get_path',
'get_path_names',
'get_paths',
'get_platform',
'get_python_version',
'get_scheme_names',
'parse_config_h',
]
def _safe_realpath(path):
try:
return realpath(path)
except OSError:
return path
if sys.executable:
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
else:
# sys.executable can be empty if argv[0] has been changed and Python is
# unable to retrieve the real program name
_PROJECT_BASE = _safe_realpath(os.getcwd())
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
# PC/VS7.1
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
# PC/AMD64
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
def is_python_build():
for fn in ("Setup.dist", "Setup.local"):
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
return True
return False
_PYTHON_BUILD = is_python_build()
_cfg_read = False
def _ensure_cfg_read():
global _cfg_read
if not _cfg_read:
from ..resources import finder
backport_package = __name__.rsplit('.', 1)[0]
_finder = finder(backport_package)
_cfgfile = _finder.find('sysconfig.cfg')
assert _cfgfile, 'sysconfig.cfg exists'
with _cfgfile.as_stream() as s:
_SCHEMES.readfp(s)
if _PYTHON_BUILD:
for scheme in ('posix_prefix', 'posix_home'):
_SCHEMES.set(scheme, 'include', '{srcdir}/Include')
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
_cfg_read = True
_SCHEMES = configparser.RawConfigParser()
_VAR_REPL = re.compile(r'\{([^{]*?)\}')
def _expand_globals(config):
_ensure_cfg_read()
if config.has_section('globals'):
globals = config.items('globals')
else:
globals = tuple()
sections = config.sections()
for section in sections:
if section == 'globals':
continue
for option, value in globals:
if config.has_option(section, option):
continue
config.set(section, option, value)
config.remove_section('globals')
# now expanding local variables defined in the cfg file
#
for section in config.sections():
variables = dict(config.items(section))
def _replacer(matchobj):
name = matchobj.group(1)
if name in variables:
return variables[name]
return matchobj.group(0)
for option, value in config.items(section):
config.set(section, option, _VAR_REPL.sub(_replacer, value))
#_expand_globals(_SCHEMES)
# FIXME don't rely on sys.version here, its format is an implementation detail
# of CPython, use sys.version_info or sys.hexversion
_PY_VERSION = sys.version.split()[0]
_PY_VERSION_SHORT = sys.version[:3]
_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2]
_PREFIX = os.path.normpath(sys.prefix)
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
_CONFIG_VARS = None
_USER_BASE = None
def _subst_vars(path, local_vars):
"""In the string `path`, replace tokens like {some.thing} with the
corresponding value from the map `local_vars`.
If there is no corresponding value, leave the token unchanged.
"""
def _replacer(matchobj):
name = matchobj.group(1)
if name in local_vars:
return local_vars[name]
elif name in os.environ:
return os.environ[name]
return matchobj.group(0)
return _VAR_REPL.sub(_replacer, path)
def _extend_dict(target_dict, other_dict):
target_keys = target_dict.keys()
for key, value in other_dict.items():
if key in target_keys:
continue
target_dict[key] = value
def _expand_vars(scheme, vars):
res = {}
if vars is None:
vars = {}
_extend_dict(vars, get_config_vars())
for key, value in _SCHEMES.items(scheme):
if os.name in ('posix', 'nt'):
value = os.path.expanduser(value)
res[key] = os.path.normpath(_subst_vars(value, vars))
return res
def format_value(value, vars):
def _replacer(matchobj):
name = matchobj.group(1)
if name in vars:
return vars[name]
return matchobj.group(0)
return _VAR_REPL.sub(_replacer, value)
def _get_default_scheme():
if os.name == 'posix':
# the default scheme for posix is posix_prefix
return 'posix_prefix'
return os.name
def _getuserbase():
env_base = os.environ.get("PYTHONUSERBASE", None)
def joinuser(*args):
return os.path.expanduser(os.path.join(*args))
# what about 'os2emx', 'riscos' ?
if os.name == "nt":
base = os.environ.get("APPDATA") or "~"
if env_base:
return env_base
else:
return joinuser(base, "Python")
if sys.platform == "darwin":
framework = get_config_var("PYTHONFRAMEWORK")
if framework:
if env_base:
return env_base
else:
return joinuser("~", "Library", framework, "%d.%d" %
sys.version_info[:2])
if env_base:
return env_base
else:
return joinuser("~", ".local")
def _parse_makefile(filename, vars=None):
"""Parse a Makefile-style file.
A dictionary containing name/value pairs is returned. If an
optional dictionary is passed in as the second argument, it is
used instead of a new dictionary.
"""
# Regexes needed for parsing Makefile (and similar syntaxes,
# like old-style Setup files).
_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
if vars is None:
vars = {}
done = {}
notdone = {}
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
lines = f.readlines()
for line in lines:
if line.startswith('#') or line.strip() == '':
continue
m = _variable_rx.match(line)
if m:
n, v = m.group(1, 2)
v = v.strip()
# `$$' is a literal `$' in make
tmpv = v.replace('$$', '')
if "$" in tmpv:
notdone[n] = v
else:
try:
v = int(v)
except ValueError:
# insert literal `$'
done[n] = v.replace('$$', '$')
else:
done[n] = v
# do variable interpolation here
variables = list(notdone.keys())
# Variables with a 'PY_' prefix in the makefile. These need to
# be made available without that prefix through sysconfig.
# Special care is needed to ensure that variable expansion works, even
# if the expansion uses the name without a prefix.
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
while len(variables) > 0:
for name in tuple(variables):
value = notdone[name]
m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
if m is not None:
n = m.group(1)
found = True
if n in done:
item = str(done[n])
elif n in notdone:
# get it on a subsequent round
found = False
elif n in os.environ:
# do it like make: fall back to environment
item = os.environ[n]
elif n in renamed_variables:
if (name.startswith('PY_') and
name[3:] in renamed_variables):
item = ""
elif 'PY_' + n in notdone:
found = False
else:
item = str(done['PY_' + n])
else:
done[n] = item = ""
if found:
after = value[m.end():]
value = value[:m.start()] + item + after
if "$" in after:
notdone[name] = value
else:
try:
value = int(value)
except ValueError:
done[name] = value.strip()
else:
done[name] = value
variables.remove(name)
if (name.startswith('PY_') and
name[3:] in renamed_variables):
name = name[3:]
if name not in done:
done[name] = value
else:
# bogus variable reference (e.g. "prefix=$/opt/python");
# just drop it since we can't deal
done[name] = value
variables.remove(name)
# strip spurious spaces
for k, v in done.items():
if isinstance(v, str):
done[k] = v.strip()
# save the results in the global dictionary
vars.update(done)
return vars
def get_makefile_filename():
"""Return the path of the Makefile."""
if _PYTHON_BUILD:
return os.path.join(_PROJECT_BASE, "Makefile")
if hasattr(sys, 'abiflags'):
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
else:
config_dir_name = 'config'
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
def _init_posix(vars):
"""Initialize the module as appropriate for POSIX systems."""
# load the installed Makefile:
makefile = get_makefile_filename()
try:
_parse_makefile(makefile, vars)
except IOError as e:
msg = "invalid Python installation: unable to open %s" % makefile
if hasattr(e, "strerror"):
msg = msg + " (%s)" % e.strerror
raise IOError(msg)
# load the installed pyconfig.h:
config_h = get_config_h_filename()
try:
with open(config_h) as f:
parse_config_h(f, vars)
except IOError as e:
msg = "invalid Python installation: unable to open %s" % config_h
if hasattr(e, "strerror"):
msg = msg + " (%s)" % e.strerror
raise IOError(msg)
# On AIX, there are wrong paths to the linker scripts in the Makefile
# -- these paths are relative to the Python source, but when installed
# the scripts are in another directory.
if _PYTHON_BUILD:
vars['LDSHARED'] = vars['BLDSHARED']
def _init_non_posix(vars):
"""Initialize the module as appropriate for NT"""
# set basic install directories
vars['LIBDEST'] = get_path('stdlib')
vars['BINLIBDEST'] = get_path('platstdlib')
vars['INCLUDEPY'] = get_path('include')
vars['SO'] = '.pyd'
vars['EXE'] = '.exe'
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
#
# public APIs
#
def parse_config_h(fp, vars=None):
"""Parse a config.h-style file.
A dictionary containing name/value pairs is returned. If an
optional dictionary is passed in as the second argument, it is
used instead of a new dictionary.
"""
if vars is None:
vars = {}
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
while True:
line = fp.readline()
if not line:
break
m = define_rx.match(line)
if m:
n, v = m.group(1, 2)
try:
v = int(v)
except ValueError:
pass
vars[n] = v
else:
m = undef_rx.match(line)
if m:
vars[m.group(1)] = 0
return vars
def get_config_h_filename():
"""Return the path of pyconfig.h."""
if _PYTHON_BUILD:
if os.name == "nt":
inc_dir = os.path.join(_PROJECT_BASE, "PC")
else:
inc_dir = _PROJECT_BASE
else:
inc_dir = get_path('platinclude')
return os.path.join(inc_dir, 'pyconfig.h')
def get_scheme_names():
"""Return a tuple containing the schemes names."""
return tuple(sorted(_SCHEMES.sections()))
def get_path_names():
"""Return a tuple containing the paths names."""
# xxx see if we want a static list
return _SCHEMES.options('posix_prefix')
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
"""Return a mapping containing an install scheme.
``scheme`` is the install scheme name. If not provided, it will
return the default scheme for the current platform.
"""
_ensure_cfg_read()
if expand:
return _expand_vars(scheme, vars)
else:
return dict(_SCHEMES.items(scheme))
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
"""Return a path corresponding to the scheme.
``scheme`` is the install scheme name.
"""
return get_paths(scheme, vars, expand)[name]
def get_config_vars(*args):
"""With no arguments, return a dictionary of all configuration
variables relevant for the current platform.
On Unix, this means every variable defined in Python's installed Makefile;
On Windows and Mac OS it's a much smaller set.
With arguments, return a list of values that result from looking up
each argument in the configuration variable dictionary.
"""
global _CONFIG_VARS
if _CONFIG_VARS is None:
_CONFIG_VARS = {}
# Normalized versions of prefix and exec_prefix are handy to have;
# in fact, these are the standard versions used most places in the
# distutils2 module.
_CONFIG_VARS['prefix'] = _PREFIX
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
_CONFIG_VARS['py_version'] = _PY_VERSION
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
_CONFIG_VARS['base'] = _PREFIX
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
try:
_CONFIG_VARS['abiflags'] = sys.abiflags
except AttributeError:
# sys.abiflags may not be defined on all platforms.
_CONFIG_VARS['abiflags'] = ''
if os.name in ('nt', 'os2'):
_init_non_posix(_CONFIG_VARS)
if os.name == 'posix':
_init_posix(_CONFIG_VARS)
# Setting 'userbase' is done below the call to the
# init function to enable using 'get_config_var' in
# the init-function.
if sys.version >= '2.6':
_CONFIG_VARS['userbase'] = _getuserbase()
if 'srcdir' not in _CONFIG_VARS:
_CONFIG_VARS['srcdir'] = _PROJECT_BASE
else:
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
# Convert srcdir into an absolute path if it appears necessary.
# Normally it is relative to the build directory. However, during
# testing, for example, we might be running a non-installed python
# from a different directory.
if _PYTHON_BUILD and os.name == "posix":
base = _PROJECT_BASE
try:
cwd = os.getcwd()
except OSError:
cwd = None
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
base != cwd):
# srcdir is relative and we are not in the same directory
# as the executable. Assume executable is in the build
# directory and make srcdir absolute.
srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
if sys.platform == 'darwin':
kernel_version = os.uname()[2] # Kernel version (8.4.3)
major_version = int(kernel_version.split('.')[0])
if major_version < 8:
# On macOS before 10.4, check if -arch and -isysroot
# are in CFLAGS or LDFLAGS and remove them if they are.
# This is needed when building extensions on a 10.3 system
# using a universal build of python.
for key in ('LDFLAGS', 'BASECFLAGS',
# a number of derived variables. These need to be
# patched up as well.
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
flags = _CONFIG_VARS[key]
flags = re.sub('-arch\s+\w+\s', ' ', flags)
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
_CONFIG_VARS[key] = flags
else:
# Allow the user to override the architecture flags using
# an environment variable.
# NOTE: This name was introduced by Apple in OSX 10.5 and
# is used by several scripting languages distributed with
# that OS release.
if 'ARCHFLAGS' in os.environ:
arch = os.environ['ARCHFLAGS']
for key in ('LDFLAGS', 'BASECFLAGS',
# a number of derived variables. These need to be
# patched up as well.
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
flags = _CONFIG_VARS[key]
flags = re.sub('-arch\s+\w+\s', ' ', flags)
flags = flags + ' ' + arch
_CONFIG_VARS[key] = flags
# If we're on OSX 10.5 or later and the user tries to
# compiles an extension using an SDK that is not present
# on the current machine it is better to not use an SDK
# than to fail.
#
# The major usecase for this is users using a Python.org
# binary installer on OSX 10.6: that installer uses
# the 10.4u SDK, but that SDK is not installed by default
# when you install Xcode.
#
CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
m = re.search('-isysroot\s+(\S+)', CFLAGS)
if m is not None:
sdk = m.group(1)
if not os.path.exists(sdk):
for key in ('LDFLAGS', 'BASECFLAGS',
# a number of derived variables. These need to be
# patched up as well.
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
flags = _CONFIG_VARS[key]
flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags)
_CONFIG_VARS[key] = flags
if args:
vals = []
for name in args:
vals.append(_CONFIG_VARS.get(name))
return vals
else:
return _CONFIG_VARS
def get_config_var(name):
"""Return the value of a single variable using the dictionary returned by
'get_config_vars()'.
Equivalent to get_config_vars().get(name)
"""
return get_config_vars().get(name)
def get_platform():
"""Return a string that identifies the current platform.
This is used mainly to distinguish platform-specific build directories and
platform-specific built distributions. Typically includes the OS name
and version and the architecture (as supplied by 'os.uname()'),
although the exact information included depends on the OS; eg. for IRIX
the architecture isn't particularly important (IRIX only runs on SGI
hardware), but for Linux the kernel version isn't particularly
important.
Examples of returned values:
linux-i586
linux-alpha (?)
solaris-2.6-sun4u
irix-5.3
irix64-6.2
Windows will return one of:
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
win-ia64 (64bit Windows on Itanium)
win32 (all others - specifically, sys.platform is returned)
For other non-POSIX platforms, currently just returns 'sys.platform'.
"""
if os.name == 'nt':
# sniff sys.version for architecture.
prefix = " bit ("
i = sys.version.find(prefix)
if i == -1:
return sys.platform
j = sys.version.find(")", i)
look = sys.version[i+len(prefix):j].lower()
if look == 'amd64':
return 'win-amd64'
if look == 'itanium':
return 'win-ia64'
return sys.platform
if os.name != "posix" or not hasattr(os, 'uname'):
# XXX what about the architecture? NT is Intel or Alpha,
# Mac OS is M68k or PPC, etc.
return sys.platform
# Try to distinguish various flavours of Unix
osname, host, release, version, machine = os.uname()
# Convert the OS name to lowercase, remove '/' characters
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
osname = osname.lower().replace('/', '')
machine = machine.replace(' ', '_')
machine = machine.replace('/', '-')
if osname[:5] == "linux":
# At least on Linux/Intel, 'machine' is the processor --
# i386, etc.
# XXX what about Alpha, SPARC, etc?
return "%s-%s" % (osname, machine)
elif osname[:5] == "sunos":
if release[0] >= "5": # SunOS 5 == Solaris 2
osname = "solaris"
release = "%d.%s" % (int(release[0]) - 3, release[2:])
# fall through to standard osname-release-machine representation
elif osname[:4] == "irix": # could be "irix64"!
return "%s-%s" % (osname, release)
elif osname[:3] == "aix":
return "%s-%s.%s" % (osname, version, release)
elif osname[:6] == "cygwin":
osname = "cygwin"
rel_re = re.compile(r'[\d.]+')
m = rel_re.match(release)
if m:
release = m.group()
elif osname[:6] == "darwin":
#
# For our purposes, we'll assume that the system version from
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
# to. This makes the compatibility story a bit more sane because the
# machine is going to compile and link as if it were
# MACOSX_DEPLOYMENT_TARGET.
cfgvars = get_config_vars()
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
if True:
# Always calculate the release of the running machine,
# needed to determine if we can build fat binaries or not.
macrelease = macver
# Get the system version. Reading this plist is a documented
# way to get the system version (see the documentation for
# the Gestalt Manager)
try:
f = open('/System/Library/CoreServices/SystemVersion.plist')
except IOError:
# We're on a plain darwin box, fall back to the default
# behaviour.
pass
else:
try:
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
r'<string>(.*?)</string>', f.read())
finally:
f.close()
if m is not None:
macrelease = '.'.join(m.group(1).split('.')[:2])
# else: fall back to the default behaviour
if not macver:
macver = macrelease
if macver:
release = macver
osname = "macosx"
if ((macrelease + '.') >= '10.4.' and
'-arch' in get_config_vars().get('CFLAGS', '').strip()):
# The universal build will build fat binaries, but not on
# systems before 10.4
#
# Try to detect 4-way universal builds, those have machine-type
# 'universal' instead of 'fat'.
machine = 'fat'
cflags = get_config_vars().get('CFLAGS')
archs = re.findall('-arch\s+(\S+)', cflags)
archs = tuple(sorted(set(archs)))
if len(archs) == 1:
machine = archs[0]
elif archs == ('i386', 'ppc'):
machine = 'fat'
elif archs == ('i386', 'x86_64'):
machine = 'intel'
elif archs == ('i386', 'ppc', 'x86_64'):
machine = 'fat3'
elif archs == ('ppc64', 'x86_64'):
machine = 'fat64'
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
machine = 'universal'
else:
raise ValueError(
"Don't know machine value for archs=%r" % (archs,))
elif machine == 'i386':
# On OSX the machine type returned by uname is always the
# 32-bit variant, even if the executable architecture is
# the 64-bit variant
if sys.maxsize >= 2**32:
machine = 'x86_64'
elif machine in ('PowerPC', 'Power_Macintosh'):
# Pick a sane name for the PPC architecture.
# See 'i386' case
if sys.maxsize >= 2**32:
machine = 'ppc64'
else:
machine = 'ppc'
return "%s-%s-%s" % (osname, release, machine)
def get_python_version():
return _PY_VERSION_SHORT
def _print_dict(title, data):
for index, (key, value) in enumerate(sorted(data.items())):
if index == 0:
print('%s: ' % (title))
print('\t%s = "%s"' % (key, value))
def _main():
"""Display all information sysconfig detains."""
print('Platform: "%s"' % get_platform())
print('Python version: "%s"' % get_python_version())
print('Current installation scheme: "%s"' % _get_default_scheme())
print()
_print_dict('Paths', get_paths())
print()
_print_dict('Variables', get_config_vars())
if __name__ == '__main__':
_main()
#-------------------------------------------------------------------
# tarfile.py
#-------------------------------------------------------------------
# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
from __future__ import print_function
"""Read from and write to tar format archives.
"""
__version__ = "$Revision$"
version = "0.9.0"
__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
#---------
# Imports
#---------
import sys
import os
import stat
import errno
import time
import struct
import copy
import re
try:
import grp, pwd
except ImportError:
grp = pwd = None
# os.symlink on Windows prior to 6.0 raises NotImplementedError
symlink_exception = (AttributeError, NotImplementedError)
try:
# WindowsError (1314) will be raised if the caller does not hold the
# SeCreateSymbolicLinkPrivilege privilege
symlink_exception += (WindowsError,)
except NameError:
pass
# from tarfile import *
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
if sys.version_info[0] < 3:
import __builtin__ as builtins
else:
import builtins
_open = builtins.open # Since 'open' is TarFile.open
#---------------------------------------------------------
# tar constants
#---------------------------------------------------------
NUL = b"\0" # the null character
BLOCKSIZE = 512 # length of processing blocks
RECORDSIZE = BLOCKSIZE * 20 # length of records
GNU_MAGIC = b"ustar \0" # magic gnu tar string
POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
LENGTH_NAME = 100 # maximum length of a filename
LENGTH_LINK = 100 # maximum length of a linkname
LENGTH_PREFIX = 155 # maximum length of the prefix field
REGTYPE = b"0" # regular file
AREGTYPE = b"\0" # regular file
LNKTYPE = b"1" # link (inside tarfile)
SYMTYPE = b"2" # symbolic link
CHRTYPE = b"3" # character special device
BLKTYPE = b"4" # block special device
DIRTYPE = b"5" # directory
FIFOTYPE = b"6" # fifo special device
CONTTYPE = b"7" # contiguous file
GNUTYPE_LONGNAME = b"L" # GNU tar longname
GNUTYPE_LONGLINK = b"K" # GNU tar longlink
GNUTYPE_SPARSE = b"S" # GNU tar sparse file
XHDTYPE = b"x" # POSIX.1-2001 extended header
XGLTYPE = b"g" # POSIX.1-2001 global header
SOLARIS_XHDTYPE = b"X" # Solaris extended header
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
GNU_FORMAT = 1 # GNU tar format
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
DEFAULT_FORMAT = GNU_FORMAT
#---------------------------------------------------------
# tarfile constants
#---------------------------------------------------------
# File types that tarfile supports:
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
SYMTYPE, DIRTYPE, FIFOTYPE,
CONTTYPE, CHRTYPE, BLKTYPE,
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
GNUTYPE_SPARSE)
# File types that will be treated as a regular file.
REGULAR_TYPES = (REGTYPE, AREGTYPE,
CONTTYPE, GNUTYPE_SPARSE)
# File types that are part of the GNU tar format.
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
GNUTYPE_SPARSE)
# Fields from a pax header that override a TarInfo attribute.
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
"uid", "gid", "uname", "gname")
# Fields from a pax header that are affected by hdrcharset.
PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname"))
# Fields in a pax header that are numbers, all other fields
# are treated as strings.
PAX_NUMBER_FIELDS = {
"atime": float,
"ctime": float,
"mtime": float,
"uid": int,
"gid": int,
"size": int
}
#---------------------------------------------------------
# Bits used in the mode field, values in octal.
#---------------------------------------------------------
S_IFLNK = 0o120000 # symbolic link
S_IFREG = 0o100000 # regular file
S_IFBLK = 0o060000 # block device
S_IFDIR = 0o040000 # directory
S_IFCHR = 0o020000 # character device
S_IFIFO = 0o010000 # fifo
TSUID = 0o4000 # set UID on execution
TSGID = 0o2000 # set GID on execution
TSVTX = 0o1000 # reserved
TUREAD = 0o400 # read by owner
TUWRITE = 0o200 # write by owner
TUEXEC = 0o100 # execute/search by owner
TGREAD = 0o040 # read by group
TGWRITE = 0o020 # write by group
TGEXEC = 0o010 # execute/search by group
TOREAD = 0o004 # read by other
TOWRITE = 0o002 # write by other
TOEXEC = 0o001 # execute/search by other
#---------------------------------------------------------
# initialization
#---------------------------------------------------------
if os.name in ("nt", "ce"):
ENCODING = "utf-8"
else:
ENCODING = sys.getfilesystemencoding()
#---------------------------------------------------------
# Some useful functions
#---------------------------------------------------------
def stn(s, length, encoding, errors):
"""Convert a string to a null-terminated bytes object.
"""
s = s.encode(encoding, errors)
return s[:length] + (length - len(s)) * NUL
def nts(s, encoding, errors):
"""Convert a null-terminated bytes object to a string.
"""
p = s.find(b"\0")
if p != -1:
s = s[:p]
return s.decode(encoding, errors)
def nti(s):
"""Convert a number field to a python number.
"""
# There are two possible encodings for a number field, see
# itn() below.
if s[0] != chr(0o200):
try:
n = int(nts(s, "ascii", "strict") or "0", 8)
except ValueError:
raise InvalidHeaderError("invalid header")
else:
n = 0
for i in range(len(s) - 1):
n <<= 8
n += ord(s[i + 1])
return n
def itn(n, digits=8, format=DEFAULT_FORMAT):
"""Convert a python number to a number field.
"""
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
# octal digits followed by a null-byte, this allows values up to
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
# that if necessary. A leading 0o200 byte indicates this particular
# encoding, the following digits-1 bytes are a big-endian
# representation. This allows values up to (256**(digits-1))-1.
if 0 <= n < 8 ** (digits - 1):
s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL
else:
if format != GNU_FORMAT or n >= 256 ** (digits - 1):
raise ValueError("overflow in number field")
if n < 0:
# XXX We mimic GNU tar's behaviour with negative numbers,
# this could raise OverflowError.
n = struct.unpack("L", struct.pack("l", n))[0]
s = bytearray()
for i in range(digits - 1):
s.insert(0, n & 0o377)
n >>= 8
s.insert(0, 0o200)
return s
def calc_chksums(buf):
"""Calculate the checksum for a member's header by summing up all
characters except for the chksum field which is treated as if
it was filled with spaces. According to the GNU tar sources,
some tars (Sun and NeXT) calculate chksum with signed char,
which will be different if there are chars in the buffer with
the high bit set. So we calculate two checksums, unsigned and
signed.
"""
unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
"""
if length == 0:
return
if length is None:
while True:
buf = src.read(16*1024)
if not buf:
break
dst.write(buf)
return
BUFSIZE = 16 * 1024
blocks, remainder = divmod(length, BUFSIZE)
for b in range(blocks):
buf = src.read(BUFSIZE)
if len(buf) < BUFSIZE:
raise IOError("end of file reached")
dst.write(buf)
if remainder != 0:
buf = src.read(remainder)
if len(buf) < remainder:
raise IOError("end of file reached")
dst.write(buf)
return
filemode_table = (
((S_IFLNK, "l"),
(S_IFREG, "-"),
(S_IFBLK, "b"),
(S_IFDIR, "d"),
(S_IFCHR, "c"),
(S_IFIFO, "p")),
((TUREAD, "r"),),
((TUWRITE, "w"),),
((TUEXEC|TSUID, "s"),
(TSUID, "S"),
(TUEXEC, "x")),
((TGREAD, "r"),),
((TGWRITE, "w"),),
((TGEXEC|TSGID, "s"),
(TSGID, "S"),
(TGEXEC, "x")),
((TOREAD, "r"),),
((TOWRITE, "w"),),
((TOEXEC|TSVTX, "t"),
(TSVTX, "T"),
(TOEXEC, "x"))
)
def filemode(mode):
"""Convert a file's mode to a string of the form
-rwxrwxrwx.
Used by TarFile.list()
"""
perm = []
for table in filemode_table:
for bit, char in table:
if mode & bit == bit:
perm.append(char)
break
else:
perm.append("-")
return "".join(perm)
class TarError(Exception):
"""Base exception."""
pass
class ExtractError(TarError):
"""General exception for extract errors."""
pass
class ReadError(TarError):
"""Exception for unreadable tar archives."""
pass
class CompressionError(TarError):
"""Exception for unavailable compression methods."""
pass
class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
pass
class HeaderError(TarError):
"""Base exception for header errors."""
pass
class EmptyHeaderError(HeaderError):
"""Exception for empty headers."""
pass
class TruncatedHeaderError(HeaderError):
"""Exception for truncated headers."""
pass
class EOFHeaderError(HeaderError):
"""Exception for end of file headers."""
pass
class InvalidHeaderError(HeaderError):
"""Exception for invalid headers."""
pass
class SubsequentHeaderError(HeaderError):
"""Exception for missing and invalid extended headers."""
pass
#---------------------------
# internal stream interface
#---------------------------
class _LowLevelFile(object):
"""Low-level file object. Supports reading and writing.
It is used instead of a regular file object for streaming
access.
"""
def __init__(self, name, mode):
mode = {
"r": os.O_RDONLY,
"w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
}[mode]
if hasattr(os, "O_BINARY"):
mode |= os.O_BINARY
self.fd = os.open(name, mode, 0o666)
def close(self):
os.close(self.fd)
def read(self, size):
return os.read(self.fd, size)
def write(self, s):
os.write(self.fd, s)
class _Stream(object):
"""Class that serves as an adapter between TarFile and
a stream-like object. The stream-like object only
needs to have a read() or write() method and is accessed
blockwise. Use of gzip or bzip2 compression is possible.
A stream-like object could be for example: sys.stdin,
sys.stdout, a socket, a tape device etc.
_Stream is intended to be used only internally.
"""
def __init__(self, name, mode, comptype, fileobj, bufsize):
"""Construct a _Stream object.
"""
self._extfileobj = True
if fileobj is None:
fileobj = _LowLevelFile(name, mode)
self._extfileobj = False
if comptype == '*':
# Enable transparent compression detection for the
# stream interface
fileobj = _StreamProxy(fileobj)
comptype = fileobj.getcomptype()
self.name = name or ""
self.mode = mode
self.comptype = comptype
self.fileobj = fileobj
self.bufsize = bufsize
self.buf = b""
self.pos = 0
self.closed = False
try:
if comptype == "gz":
try:
import zlib
except ImportError:
raise CompressionError("zlib module is not available")
self.zlib = zlib
self.crc = zlib.crc32(b"")
if mode == "r":
self._init_read_gz()
else:
self._init_write_gz()
if comptype == "bz2":
try:
import bz2
except ImportError:
raise CompressionError("bz2 module is not available")
if mode == "r":
self.dbuf = b""
self.cmp = bz2.BZ2Decompressor()
else:
self.cmp = bz2.BZ2Compressor()
except:
if not self._extfileobj:
self.fileobj.close()
self.closed = True
raise
def __del__(self):
if hasattr(self, "closed") and not self.closed:
self.close()
def _init_write_gz(self):
"""Initialize for writing with gzip compression.
"""
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
-self.zlib.MAX_WBITS,
self.zlib.DEF_MEM_LEVEL,
0)
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
self.name = self.name[:-3]
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
def write(self, s):
"""Write string s to the stream.
"""
if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc)
self.pos += len(s)
if self.comptype != "tar":
s = self.cmp.compress(s)
self.__write(s)
def __write(self, s):
"""Write string s to the stream if a whole new block
is ready to be written.
"""
self.buf += s
while len(self.buf) > self.bufsize:
self.fileobj.write(self.buf[:self.bufsize])
self.buf = self.buf[self.bufsize:]
def close(self):
"""Close the _Stream object. No operation should be
done on it afterwards.
"""
if self.closed:
return
if self.mode == "w" and self.comptype != "tar":
self.buf += self.cmp.flush()
if self.mode == "w" and self.buf:
self.fileobj.write(self.buf)
self.buf = b""
if self.comptype == "gz":
# The native zlib crc is an unsigned 32-bit integer, but
# the Python wrapper implicitly casts that to a signed C
# long. So, on a 32-bit box self.crc may "look negative",
# while the same crc on a 64-bit box may "look positive".
# To avoid irksome warnings from the `struct` module, force
# it to look positive on all boxes.
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
if not self._extfileobj:
self.fileobj.close()
self.closed = True
def _init_read_gz(self):
"""Initialize for reading a gzip compressed fileobj.
"""
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
self.dbuf = b""
# taken from gzip.GzipFile with some alterations
if self.__read(2) != b"\037\213":
raise ReadError("not a gzip file")
if self.__read(1) != b"\010":
raise CompressionError("unsupported compression method")
flag = ord(self.__read(1))
self.__read(6)
if flag & 4:
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
self.read(xlen)
if flag & 8:
while True:
s = self.__read(1)
if not s or s == NUL:
break
if flag & 16:
while True:
s = self.__read(1)
if not s or s == NUL:
break
if flag & 2:
self.__read(2)
def tell(self):
"""Return the stream's file pointer position.
"""
return self.pos
def seek(self, pos=0):
"""Set the stream's file pointer to pos. Negative seeking
is forbidden.
"""
if pos - self.pos >= 0:
blocks, remainder = divmod(pos - self.pos, self.bufsize)
for i in range(blocks):
self.read(self.bufsize)
self.read(remainder)
else:
raise StreamError("seeking backwards is not allowed")
return self.pos
def read(self, size=None):
"""Return the next size number of bytes from the stream.
If size is not defined, return all bytes of the stream
up to EOF.
"""
if size is None:
t = []
while True:
buf = self._read(self.bufsize)
if not buf:
break
t.append(buf)
buf = "".join(t)
else:
buf = self._read(size)
self.pos += len(buf)
return buf
def _read(self, size):
"""Return size bytes from the stream.
"""
if self.comptype == "tar":
return self.__read(size)
c = len(self.dbuf)
while c < size:
buf = self.__read(self.bufsize)
if not buf:
break
try:
buf = self.cmp.decompress(buf)
except IOError:
raise ReadError("invalid compressed data")
self.dbuf += buf
c += len(buf)
buf = self.dbuf[:size]
self.dbuf = self.dbuf[size:]
return buf
def __read(self, size):
"""Return size bytes from stream. If internal buffer is empty,
read another block from the stream.
"""
c = len(self.buf)
while c < size:
buf = self.fileobj.read(self.bufsize)
if not buf:
break
self.buf += buf
c += len(buf)
buf = self.buf[:size]
self.buf = self.buf[size:]
return buf
# class _Stream
class _StreamProxy(object):
"""Small proxy class that enables transparent compression
detection for the Stream interface (mode 'r|*').
"""
def __init__(self, fileobj):
self.fileobj = fileobj
self.buf = self.fileobj.read(BLOCKSIZE)
def read(self, size):
self.read = self.fileobj.read
return self.buf
def getcomptype(self):
if self.buf.startswith(b"\037\213\010"):
return "gz"
if self.buf.startswith(b"BZh91"):
return "bz2"
return "tar"
def close(self):
self.fileobj.close()
# class StreamProxy
class _BZ2Proxy(object):
"""Small proxy class that enables external file object
support for "r:bz2" and "w:bz2" modes. This is actually
a workaround for a limitation in bz2 module's BZ2File
class which (unlike gzip.GzipFile) has no support for
a file object argument.
"""
blocksize = 16 * 1024
def __init__(self, fileobj, mode):
self.fileobj = fileobj
self.mode = mode
self.name = getattr(self.fileobj, "name", None)
self.init()
def init(self):
import bz2
self.pos = 0
if self.mode == "r":
self.bz2obj = bz2.BZ2Decompressor()
self.fileobj.seek(0)
self.buf = b""
else:
self.bz2obj = bz2.BZ2Compressor()
def read(self, size):
x = len(self.buf)
while x < size:
raw = self.fileobj.read(self.blocksize)
if not raw:
break
data = self.bz2obj.decompress(raw)
self.buf += data
x += len(data)
buf = self.buf[:size]
self.buf = self.buf[size:]
self.pos += len(buf)
return buf
def seek(self, pos):
if pos < self.pos:
self.init()
self.read(pos - self.pos)
def tell(self):
return self.pos
def write(self, data):
self.pos += len(data)
raw = self.bz2obj.compress(data)
self.fileobj.write(raw)
def close(self):
if self.mode == "w":
raw = self.bz2obj.flush()
self.fileobj.write(raw)
# class _BZ2Proxy
#------------------------
# Extraction file object
#------------------------
class _FileInFile(object):
"""A thin wrapper around an existing file object that
provides a part of its data as an individual file
object.
"""
def __init__(self, fileobj, offset, size, blockinfo=None):
self.fileobj = fileobj
self.offset = offset
self.size = size
self.position = 0
if blockinfo is None:
blockinfo = [(0, size)]
# Construct a map with data and zero blocks.
self.map_index = 0
self.map = []
lastpos = 0
realpos = self.offset
for offset, size in blockinfo:
if offset > lastpos:
self.map.append((False, lastpos, offset, None))
self.map.append((True, offset, offset + size, realpos))
realpos += size
lastpos = offset + size
if lastpos < self.size:
self.map.append((False, lastpos, self.size, None))
def seekable(self):
if not hasattr(self.fileobj, "seekable"):
# XXX gzip.GzipFile and bz2.BZ2File
return True
return self.fileobj.seekable()
def tell(self):
"""Return the current file position.
"""
return self.position
def seek(self, position):
"""Seek to a position in the file.
"""
self.position = position
def read(self, size=None):
"""Read data from the file.
"""
if size is None:
size = self.size - self.position
else:
size = min(size, self.size - self.position)
buf = b""
while size > 0:
while True:
data, start, stop, offset = self.map[self.map_index]
if start <= self.position < stop:
break
else:
self.map_index += 1
if self.map_index == len(self.map):
self.map_index = 0
length = min(size, stop - self.position)
if data:
self.fileobj.seek(offset + (self.position - start))
buf += self.fileobj.read(length)
else:
buf += NUL * length
size -= length
self.position += length
return buf
#class _FileInFile
class ExFileObject(object):
"""File-like object for reading an archive member.
Is returned by TarFile.extractfile().
"""
blocksize = 1024
def __init__(self, tarfile, tarinfo):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
tarinfo.sparse)
self.name = tarinfo.name
self.mode = "r"
self.closed = False
self.size = tarinfo.size
self.position = 0
self.buffer = b""
def readable(self):
return True
def writable(self):
return False
def seekable(self):
return self.fileobj.seekable()
def read(self, size=None):
"""Read at most size bytes from the file. If size is not
present or None, read all data until EOF is reached.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
buf = b""
if self.buffer:
if size is None:
buf = self.buffer
self.buffer = b""
else:
buf = self.buffer[:size]
self.buffer = self.buffer[size:]
if size is None:
buf += self.fileobj.read()
else:
buf += self.fileobj.read(size - len(buf))
self.position += len(buf)
return buf
# XXX TextIOWrapper uses the read1() method.
read1 = read
def readline(self, size=-1):
"""Read one entire line from the file. If size is present
and non-negative, return a string with at most that
size, which may be an incomplete line.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
while True:
buf = self.fileobj.read(self.blocksize)
self.buffer += buf
if not buf or b"\n" in buf:
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
pos = len(self.buffer)
break
if size != -1:
pos = min(size, pos)
buf = self.buffer[:pos]
self.buffer = self.buffer[pos:]
self.position += len(buf)
return buf
def readlines(self):
"""Return a list with all remaining lines.
"""
result = []
while True:
line = self.readline()
if not line: break
result.append(line)
return result
def tell(self):
"""Return the current file position.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
return self.position
def seek(self, pos, whence=os.SEEK_SET):
"""Seek to a position in the file.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if whence == os.SEEK_SET:
self.position = min(max(pos, 0), self.size)
elif whence == os.SEEK_CUR:
if pos < 0:
self.position = max(self.position + pos, 0)
else:
self.position = min(self.position + pos, self.size)
elif whence == os.SEEK_END:
self.position = max(min(self.size + pos, self.size), 0)
else:
raise ValueError("Invalid argument")
self.buffer = b""
self.fileobj.seek(self.position)
def close(self):
"""Close the file object.
"""
self.closed = True
def __iter__(self):
"""Get an iterator over the file's lines.
"""
while True:
line = self.readline()
if not line:
break
yield line
#class ExFileObject
#------------------
# Exported Classes
#------------------
class TarInfo(object):
"""Informational class which holds the details about an
archive member given by a tar header block.
TarInfo objects are returned by TarFile.getmember(),
TarFile.getmembers() and TarFile.gettarinfo() and are
usually created internally.
"""
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
"chksum", "type", "linkname", "uname", "gname",
"devmajor", "devminor",
"offset", "offset_data", "pax_headers", "sparse",
"tarfile", "_sparse_structs", "_link_target")
def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name
of the member.
"""
self.name = name # member name
self.mode = 0o644 # file permissions
self.uid = 0 # user id
self.gid = 0 # group id
self.size = 0 # file size
self.mtime = 0 # modification time
self.chksum = 0 # header checksum
self.type = REGTYPE # member type
self.linkname = "" # link name
self.uname = "" # user name
self.gname = "" # group name
self.devmajor = 0 # device major number
self.devminor = 0 # device minor number
self.offset = 0 # the tar header starts here
self.offset_data = 0 # the file's data starts here
self.sparse = None # sparse member information
self.pax_headers = {} # pax header information
# In pax headers the "name" and "linkname" field are called
# "path" and "linkpath".
def _getpath(self):
return self.name
def _setpath(self, name):
self.name = name
path = property(_getpath, _setpath)
def _getlinkpath(self):
return self.linkname
def _setlinkpath(self, linkname):
self.linkname = linkname
linkpath = property(_getlinkpath, _setlinkpath)
def __repr__(self):
return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
def get_info(self):
"""Return the TarInfo's attributes as a dictionary.
"""
info = {
"name": self.name,
"mode": self.mode & 0o7777,
"uid": self.uid,
"gid": self.gid,
"size": self.size,
"mtime": self.mtime,
"chksum": self.chksum,
"type": self.type,
"linkname": self.linkname,
"uname": self.uname,
"gname": self.gname,
"devmajor": self.devmajor,
"devminor": self.devminor
}
if info["type"] == DIRTYPE and not info["name"].endswith("/"):
info["name"] += "/"
return info
def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
"""Return a tar header as a string of 512 byte blocks.
"""
info = self.get_info()
if format == USTAR_FORMAT:
return self.create_ustar_header(info, encoding, errors)
elif format == GNU_FORMAT:
return self.create_gnu_header(info, encoding, errors)
elif format == PAX_FORMAT:
return self.create_pax_header(info, encoding)
else:
raise ValueError("invalid format")
def create_ustar_header(self, info, encoding, errors):
"""Return the object as a ustar header block.
"""
info["magic"] = POSIX_MAGIC
if len(info["linkname"]) > LENGTH_LINK:
raise ValueError("linkname is too long")
if len(info["name"]) > LENGTH_NAME:
info["prefix"], info["name"] = self._posix_split_name(info["name"])
return self._create_header(info, USTAR_FORMAT, encoding, errors)
def create_gnu_header(self, info, encoding, errors):
"""Return the object as a GNU header block sequence.
"""
info["magic"] = GNU_MAGIC
buf = b""
if len(info["linkname"]) > LENGTH_LINK:
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
if len(info["name"]) > LENGTH_NAME:
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
def create_pax_header(self, info, encoding):
"""Return the object as a ustar header block. If it cannot be
represented this way, prepend a pax extended header sequence
with supplement information.
"""
info["magic"] = POSIX_MAGIC
pax_headers = self.pax_headers.copy()
# Test string fields for values that exceed the field length or cannot
# be represented in ASCII encoding.
for name, hname, length in (
("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
("uname", "uname", 32), ("gname", "gname", 32)):
if hname in pax_headers:
# The pax header has priority.
continue
# Try to encode the string as ASCII.
try:
info[name].encode("ascii", "strict")
except UnicodeEncodeError:
pax_headers[hname] = info[name]
continue
if len(info[name]) > length:
pax_headers[hname] = info[name]
# Test number fields for values that exceed the field limit or values
# that like to be stored as float.
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
if name in pax_headers:
# The pax header has priority. Avoid overflow.
info[name] = 0
continue
val = info[name]
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
pax_headers[name] = str(val)
info[name] = 0
# Create a pax extended header if necessary.
if pax_headers:
buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
else:
buf = b""
return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
@classmethod
def create_pax_global_header(cls, pax_headers):
"""Return the object as a pax global header block sequence.
"""
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
def _posix_split_name(self, name):
"""Split a name longer than 100 chars into a prefix
and a name part.
"""
prefix = name[:LENGTH_PREFIX + 1]
while prefix and prefix[-1] != "/":
prefix = prefix[:-1]
name = name[len(prefix):]
prefix = prefix[:-1]
if not prefix or len(name) > LENGTH_NAME:
raise ValueError("name is too long")
return prefix, name
@staticmethod
def _create_header(info, format, encoding, errors):
"""Return a header block. info is a dictionary with file
information, format must be one of the *_FORMAT constants.
"""
parts = [
stn(info.get("name", ""), 100, encoding, errors),
itn(info.get("mode", 0) & 0o7777, 8, format),
itn(info.get("uid", 0), 8, format),
itn(info.get("gid", 0), 8, format),
itn(info.get("size", 0), 12, format),
itn(info.get("mtime", 0), 12, format),
b" ", # checksum field
info.get("type", REGTYPE),
stn(info.get("linkname", ""), 100, encoding, errors),
info.get("magic", POSIX_MAGIC),
stn(info.get("uname", ""), 32, encoding, errors),
stn(info.get("gname", ""), 32, encoding, errors),
itn(info.get("devmajor", 0), 8, format),
itn(info.get("devminor", 0), 8, format),
stn(info.get("prefix", ""), 155, encoding, errors)
]
buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:]
return buf
@staticmethod
def _create_payload(payload):
"""Return the string payload filled with zero bytes
up to the next 512 byte border.
"""
blocks, remainder = divmod(len(payload), BLOCKSIZE)
if remainder > 0:
payload += (BLOCKSIZE - remainder) * NUL
return payload
@classmethod
def _create_gnu_long_header(cls, name, type, encoding, errors):
"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
for name.
"""
name = name.encode(encoding, errors) + NUL
info = {}
info["name"] = "././@LongLink"
info["type"] = type
info["size"] = len(name)
info["magic"] = GNU_MAGIC
# create extended header + name blocks.
return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
cls._create_payload(name)
@classmethod
def _create_pax_generic_header(cls, pax_headers, type, encoding):
"""Return a POSIX.1-2008 extended or global header sequence
that contains a list of keyword, value pairs. The values
must be strings.
"""
# Check if one of the fields contains surrogate characters and thereby
# forces hdrcharset=BINARY, see _proc_pax() for more information.
binary = False
for keyword, value in pax_headers.items():
try:
value.encode("utf8", "strict")
except UnicodeEncodeError:
binary = True
break
records = b""
if binary:
# Put the hdrcharset field at the beginning of the header.
records += b"21 hdrcharset=BINARY\n"
for keyword, value in pax_headers.items():
keyword = keyword.encode("utf8")
if binary:
# Try to restore the original byte representation of `value'.
# Needless to say, that the encoding must match the string.
value = value.encode(encoding, "surrogateescape")
else:
value = value.encode("utf8")
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
n = p = 0
while True:
n = l + len(str(p))
if n == p:
break
p = n
records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
# We use a hardcoded "././@PaxHeader" name like star does
# instead of the one that POSIX recommends.
info = {}
info["name"] = "././@PaxHeader"
info["type"] = type
info["size"] = len(records)
info["magic"] = POSIX_MAGIC
# Create pax header + record blocks.
return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
cls._create_payload(records)
@classmethod
def frombuf(cls, buf, encoding, errors):
"""Construct a TarInfo object from a 512 byte bytes object.
"""
if len(buf) == 0:
raise EmptyHeaderError("empty header")
if len(buf) != BLOCKSIZE:
raise TruncatedHeaderError("truncated header")
if buf.count(NUL) == BLOCKSIZE:
raise EOFHeaderError("end of file header")
chksum = nti(buf[148:156])
if chksum not in calc_chksums(buf):
raise InvalidHeaderError("bad checksum")
obj = cls()
obj.name = nts(buf[0:100], encoding, errors)
obj.mode = nti(buf[100:108])
obj.uid = nti(buf[108:116])
obj.gid = nti(buf[116:124])
obj.size = nti(buf[124:136])
obj.mtime = nti(buf[136:148])
obj.chksum = chksum
obj.type = buf[156:157]
obj.linkname = nts(buf[157:257], encoding, errors)
obj.uname = nts(buf[265:297], encoding, errors)
obj.gname = nts(buf[297:329], encoding, errors)
obj.devmajor = nti(buf[329:337])
obj.devminor = nti(buf[337:345])
prefix = nts(buf[345:500], encoding, errors)
# Old V7 tar format represents a directory as a regular
# file with a trailing slash.
if obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE
# The old GNU sparse format occupies some of the unused
# space in the buffer for up to 4 sparse structures.
# Save the them for later processing in _proc_sparse().
if obj.type == GNUTYPE_SPARSE:
pos = 386
structs = []
for i in range(4):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[482])
origsize = nti(buf[483:495])
obj._sparse_structs = (structs, isextended, origsize)
# Remove redundant slashes from directories.
if obj.isdir():
obj.name = obj.name.rstrip("/")
# Reconstruct a ustar longname.
if prefix and obj.type not in GNU_TYPES:
obj.name = prefix + "/" + obj.name
return obj
@classmethod
def fromtarfile(cls, tarfile):
"""Return the next TarInfo object from TarFile object
tarfile.
"""
buf = tarfile.fileobj.read(BLOCKSIZE)
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
return obj._proc_member(tarfile)
#--------------------------------------------------------------------------
# The following are methods that are called depending on the type of a
# member. The entry point is _proc_member() which can be overridden in a
# subclass to add custom _proc_*() methods. A _proc_*() method MUST
# implement the following
# operations:
# 1. Set self.offset_data to the position where the data blocks begin,
# if there is data that follows.
# 2. Set tarfile.offset to the position where the next member's header will
# begin.
# 3. Return self or another valid TarInfo object.
def _proc_member(self, tarfile):
"""Choose the right processing method depending on
the type and call it.
"""
if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
return self._proc_gnulong(tarfile)
elif self.type == GNUTYPE_SPARSE:
return self._proc_sparse(tarfile)
elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
return self._proc_pax(tarfile)
else:
return self._proc_builtin(tarfile)
def _proc_builtin(self, tarfile):
"""Process a builtin type or an unknown type which
will be treated as a regular file.
"""
self.offset_data = tarfile.fileobj.tell()
offset = self.offset_data
if self.isreg() or self.type not in SUPPORTED_TYPES:
# Skip the following data blocks.
offset += self._block(self.size)
tarfile.offset = offset
# Patch the TarInfo object with saved global
# header information.
self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
return self
def _proc_gnulong(self, tarfile):
"""Process the blocks that hold a GNU longname
or longlink member.
"""
buf = tarfile.fileobj.read(self._block(self.size))
# Fetch the next header and process it.
try:
next = self.fromtarfile(tarfile)
except HeaderError:
raise SubsequentHeaderError("missing or bad subsequent header")
# Patch the TarInfo object from the next header with
# the longname information.
next.offset = self.offset
if self.type == GNUTYPE_LONGNAME:
next.name = nts(buf, tarfile.encoding, tarfile.errors)
elif self.type == GNUTYPE_LONGLINK:
next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
return next
def _proc_sparse(self, tarfile):
"""Process a GNU sparse header plus extra headers.
"""
# We already collected some sparse structures in frombuf().
structs, isextended, origsize = self._sparse_structs
del self._sparse_structs
# Collect sparse structures from extended header blocks.
while isextended:
buf = tarfile.fileobj.read(BLOCKSIZE)
pos = 0
for i in range(21):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset and numbytes:
structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[504])
self.sparse = structs
self.offset_data = tarfile.fileobj.tell()
tarfile.offset = self.offset_data + self._block(self.size)
self.size = origsize
return self
def _proc_pax(self, tarfile):
"""Process an extended or global header as described in
POSIX.1-2008.
"""
# Read the header information.
buf = tarfile.fileobj.read(self._block(self.size))
# A pax header stores supplemental information for either
# the following file (extended) or all following files
# (global).
if self.type == XGLTYPE:
pax_headers = tarfile.pax_headers
else:
pax_headers = tarfile.pax_headers.copy()
# Check if the pax header contains a hdrcharset field. This tells us
# the encoding of the path, linkpath, uname and gname fields. Normally,
# these fields are UTF-8 encoded but since POSIX.1-2008 tar
# implementations are allowed to store them as raw binary strings if
# the translation to UTF-8 fails.
match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
if match is not None:
pax_headers["hdrcharset"] = match.group(1).decode("utf8")
# For the time being, we don't care about anything other than "BINARY".
# The only other value that is currently allowed by the standard is
# "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
hdrcharset = pax_headers.get("hdrcharset")
if hdrcharset == "BINARY":
encoding = tarfile.encoding
else:
encoding = "utf8"
# Parse pax header information. A record looks like that:
# "%d %s=%s\n" % (length, keyword, value). length is the size
# of the complete record including the length field itself and
# the newline. keyword and value are both UTF-8 encoded strings.
regex = re.compile(br"(\d+) ([^=]+)=")
pos = 0
while True:
match = regex.match(buf, pos)
if not match:
break
length, keyword = match.groups()
length = int(length)
value = buf[match.end(2) + 1:match.start(1) + length - 1]
# Normally, we could just use "utf8" as the encoding and "strict"
# as the error handler, but we better not take the risk. For
# example, GNU tar <= 1.23 is known to store filenames it cannot
# translate to UTF-8 as raw strings (unfortunately without a
# hdrcharset=BINARY header).
# We first try the strict standard encoding, and if that fails we
# fall back on the user's encoding and error handler.
keyword = self._decode_pax_field(keyword, "utf8", "utf8",
tarfile.errors)
if keyword in PAX_NAME_FIELDS:
value = self._decode_pax_field(value, encoding, tarfile.encoding,
tarfile.errors)
else:
value = self._decode_pax_field(value, "utf8", "utf8",
tarfile.errors)
pax_headers[keyword] = value
pos += length
# Fetch the next header.
try:
next = self.fromtarfile(tarfile)
except HeaderError:
raise SubsequentHeaderError("missing or bad subsequent header")
# Process GNU sparse information.
if "GNU.sparse.map" in pax_headers:
# GNU extended sparse format version 0.1.
self._proc_gnusparse_01(next, pax_headers)
elif "GNU.sparse.size" in pax_headers:
# GNU extended sparse format version 0.0.
self._proc_gnusparse_00(next, pax_headers, buf)
elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
# GNU extended sparse format version 1.0.
self._proc_gnusparse_10(next, pax_headers, tarfile)
if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
# Patch the TarInfo object with the extended header info.
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
next.offset = self.offset
if "size" in pax_headers:
# If the extended header replaces the size field,
# we need to recalculate the offset where the next
# header starts.
offset = next.offset_data
if next.isreg() or next.type not in SUPPORTED_TYPES:
offset += next._block(next.size)
tarfile.offset = offset
return next
def _proc_gnusparse_00(self, next, pax_headers, buf):
"""Process a GNU tar extended sparse header, version 0.0.
"""
offsets = []
for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
offsets.append(int(match.group(1)))
numbytes = []
for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
numbytes.append(int(match.group(1)))
next.sparse = list(zip(offsets, numbytes))
def _proc_gnusparse_01(self, next, pax_headers):
"""Process a GNU tar extended sparse header, version 0.1.
"""
sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
next.sparse = list(zip(sparse[::2], sparse[1::2]))
def _proc_gnusparse_10(self, next, pax_headers, tarfile):
"""Process a GNU tar extended sparse header, version 1.0.
"""
fields = None
sparse = []
buf = tarfile.fileobj.read(BLOCKSIZE)
fields, buf = buf.split(b"\n", 1)
fields = int(fields)
while len(sparse) < fields * 2:
if b"\n" not in buf:
buf += tarfile.fileobj.read(BLOCKSIZE)
number, buf = buf.split(b"\n", 1)
sparse.append(int(number))
next.offset_data = tarfile.fileobj.tell()
next.sparse = list(zip(sparse[::2], sparse[1::2]))
def _apply_pax_info(self, pax_headers, encoding, errors):
"""Replace fields with supplemental information from a previous
pax extended or global header.
"""
for keyword, value in pax_headers.items():
if keyword == "GNU.sparse.name":
setattr(self, "path", value)
elif keyword == "GNU.sparse.size":
setattr(self, "size", int(value))
elif keyword == "GNU.sparse.realsize":
setattr(self, "size", int(value))
elif keyword in PAX_FIELDS:
if keyword in PAX_NUMBER_FIELDS:
try:
value = PAX_NUMBER_FIELDS[keyword](value)
except ValueError:
value = 0
if keyword == "path":
value = value.rstrip("/")
setattr(self, keyword, value)
self.pax_headers = pax_headers.copy()
def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
"""Decode a single field from a pax record.
"""
try:
return value.decode(encoding, "strict")
except UnicodeDecodeError:
return value.decode(fallback_encoding, fallback_errors)
def _block(self, count):
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
"""
blocks, remainder = divmod(count, BLOCKSIZE)
if remainder:
blocks += 1
return blocks * BLOCKSIZE
def isreg(self):
return self.type in REGULAR_TYPES
def isfile(self):
return self.isreg()
def isdir(self):
return self.type == DIRTYPE
def issym(self):
return self.type == SYMTYPE
def islnk(self):
return self.type == LNKTYPE
def ischr(self):
return self.type == CHRTYPE
def isblk(self):
return self.type == BLKTYPE
def isfifo(self):
return self.type == FIFOTYPE
def issparse(self):
return self.sparse is not None
def isdev(self):
return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
# class TarInfo
class TarFile(object):
"""The TarFile Class provides an interface to tar archives.
"""
debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
dereference = False # If true, add content of linked file to the
# tar file, else the link.
ignore_zeros = False # If true, skips empty or invalid blocks and
# continues processing.
errorlevel = 1 # If 0, fatal errors only appear in debug
# messages (if debug >= 0). If > 0, errors
# are passed to the caller as exceptions.
format = DEFAULT_FORMAT # The format to use when creating an archive.
encoding = ENCODING # Encoding for 8-bit character strings.
errors = None # Error handler for unicode conversion.
tarinfo = TarInfo # The default TarInfo class to use.
fileobject = ExFileObject # The default ExFileObject class to use.
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
defaults to 'r'.
If `fileobj' is given, it is used for reading or writing data. If it
can be determined, `mode' is overridden by `fileobj's mode.
`fileobj' is not closed, when TarFile is closed.
"""
if len(mode) > 1 or mode not in "raw":
raise ValueError("mode must be 'r', 'a' or 'w'")
self.mode = mode
self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
if not fileobj:
if self.mode == "a" and not os.path.exists(name):
# Create nonexistent files in append mode.
self.mode = "w"
self._mode = "wb"
fileobj = bltn_open(name, self._mode)
self._extfileobj = False
else:
if name is None and hasattr(fileobj, "name"):
name = fileobj.name
if hasattr(fileobj, "mode"):
self._mode = fileobj.mode
self._extfileobj = True
self.name = os.path.abspath(name) if name else None
self.fileobj = fileobj
# Init attributes.
if format is not None:
self.format = format
if tarinfo is not None:
self.tarinfo = tarinfo
if dereference is not None:
self.dereference = dereference
if ignore_zeros is not None:
self.ignore_zeros = ignore_zeros
if encoding is not None:
self.encoding = encoding
self.errors = errors
if pax_headers is not None and self.format == PAX_FORMAT:
self.pax_headers = pax_headers
else:
self.pax_headers = {}
if debug is not None:
self.debug = debug
if errorlevel is not None:
self.errorlevel = errorlevel
# Init datastructures.
self.closed = False
self.members = [] # list of members as TarInfo objects
self._loaded = False # flag if all members have been read
self.offset = self.fileobj.tell()
# current position in the archive file
self.inodes = {} # dictionary caching the inodes of
# archive members already added
try:
if self.mode == "r":
self.firstmember = None
self.firstmember = self.next()
if self.mode == "a":
# Move to the end of the archive,
# before the first empty block.
while True:
self.fileobj.seek(self.offset)
try:
tarinfo = self.tarinfo.fromtarfile(self)
self.members.append(tarinfo)
except EOFHeaderError:
self.fileobj.seek(self.offset)
break
except HeaderError as e:
raise ReadError(str(e))
if self.mode in "aw":
self._loaded = True
if self.pax_headers:
buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
self.fileobj.write(buf)
self.offset += len(buf)
except:
if not self._extfileobj:
self.fileobj.close()
self.closed = True
raise
#--------------------------------------------------------------------------
# Below are the classmethods which act as alternate constructors to the
# TarFile class. The open() method is the only one that is needed for
# public use; it is the "super"-constructor and is able to select an
# adequate "sub"-constructor for a particular compression using the mapping
# from OPEN_METH.
#
# This concept allows one to subclass TarFile without losing the comfort of
# the super-constructor. A sub-constructor is registered and made available
# by adding it to the mapping in OPEN_METH.
@classmethod
def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
"""Open a tar archive for reading, writing or appending. Return
an appropriate TarFile class.
mode:
'r' or 'r:*' open for reading with transparent compression
'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression
'a' or 'a:' open for appending, creating the file if necessary
'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression
'r|*' open a stream of tar blocks with transparent compression
'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks
'w|' open an uncompressed stream for writing
'w|gz' open a gzip compressed stream for writing
'w|bz2' open a bzip2 compressed stream for writing
"""
if not name and not fileobj:
raise ValueError("nothing to open")
if mode in ("r", "r:*"):
# Find out which *open() is appropriate for opening the file.
for comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
if fileobj is not None:
saved_pos = fileobj.tell()
try:
return func(name, "r", fileobj, **kwargs)
except (ReadError, CompressionError) as e:
if fileobj is not None:
fileobj.seek(saved_pos)
continue
raise ReadError("file could not be opened successfully")
elif ":" in mode:
filemode, comptype = mode.split(":", 1)
filemode = filemode or "r"
comptype = comptype or "tar"
# Select the *open() function according to
# given compression.
if comptype in cls.OPEN_METH:
func = getattr(cls, cls.OPEN_METH[comptype])
else:
raise CompressionError("unknown compression type %r" % comptype)
return func(name, filemode, fileobj, **kwargs)
elif "|" in mode:
filemode, comptype = mode.split("|", 1)
filemode = filemode or "r"
comptype = comptype or "tar"
if filemode not in "rw":
raise ValueError("mode must be 'r' or 'w'")
stream = _Stream(name, filemode, comptype, fileobj, bufsize)
try:
t = cls(name, filemode, stream, **kwargs)
except:
stream.close()
raise
t._extfileobj = False
return t
elif mode in "aw":
return cls.taropen(name, mode, fileobj, **kwargs)
raise ValueError("undiscernible mode")
@classmethod
def taropen(cls, name, mode="r", fileobj=None, **kwargs):
"""Open uncompressed tar archive name for reading or writing.
"""
if len(mode) > 1 or mode not in "raw":
raise ValueError("mode must be 'r', 'a' or 'w'")
return cls(name, mode, fileobj, **kwargs)
@classmethod
def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
"""Open gzip compressed tar archive name for reading or writing.
Appending is not allowed.
"""
if len(mode) > 1 or mode not in "rw":
raise ValueError("mode must be 'r' or 'w'")
try:
import gzip
gzip.GzipFile
except (ImportError, AttributeError):
raise CompressionError("gzip module is not available")
extfileobj = fileobj is not None
try:
fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
t = cls.taropen(name, mode, fileobj, **kwargs)
except IOError:
if not extfileobj and fileobj is not None:
fileobj.close()
if fileobj is None:
raise
raise ReadError("not a gzip file")
except:
if not extfileobj and fileobj is not None:
fileobj.close()
raise
t._extfileobj = extfileobj
return t
@classmethod
def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
"""Open bzip2 compressed tar archive name for reading or writing.
Appending is not allowed.
"""
if len(mode) > 1 or mode not in "rw":
raise ValueError("mode must be 'r' or 'w'.")
try:
import bz2
except ImportError:
raise CompressionError("bz2 module is not available")
if fileobj is not None:
fileobj = _BZ2Proxy(fileobj, mode)
else:
fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
try:
t = cls.taropen(name, mode, fileobj, **kwargs)
except (IOError, EOFError):
fileobj.close()
raise ReadError("not a bzip2 file")
t._extfileobj = False
return t
# All *open() methods are registered here.
OPEN_METH = {
"tar": "taropen", # uncompressed tar
"gz": "gzopen", # gzip compressed tar
"bz2": "bz2open" # bzip2 compressed tar
}
#--------------------------------------------------------------------------
# The public methods which TarFile provides:
def close(self):
"""Close the TarFile. In write-mode, two finishing zero blocks are
appended to the archive.
"""
if self.closed:
return
if self.mode in "aw":
self.fileobj.write(NUL * (BLOCKSIZE * 2))
self.offset += (BLOCKSIZE * 2)
# fill up the end with zero-blocks
# (like option -b20 for tar does)
blocks, remainder = divmod(self.offset, RECORDSIZE)
if remainder > 0:
self.fileobj.write(NUL * (RECORDSIZE - remainder))
if not self._extfileobj:
self.fileobj.close()
self.closed = True
def getmember(self, name):
"""Return a TarInfo object for member `name'. If `name' can not be
found in the archive, KeyError is raised. If a member occurs more
than once in the archive, its last occurrence is assumed to be the
most up-to-date version.
"""
tarinfo = self._getmember(name)
if tarinfo is None:
raise KeyError("filename %r not found" % name)
return tarinfo
def getmembers(self):
"""Return the members of the archive as a list of TarInfo objects. The
list has the same order as the members in the archive.
"""
self._check()
if not self._loaded: # if we want to obtain a list of
self._load() # all members, we first have to
# scan the whole archive.
return self.members
def getnames(self):
"""Return the members of the archive as a list of their names. It has
the same order as the list returned by getmembers().
"""
return [tarinfo.name for tarinfo in self.getmembers()]
def gettarinfo(self, name=None, arcname=None, fileobj=None):
"""Create a TarInfo object for either the file `name' or the file
object `fileobj' (using os.fstat on its file descriptor). You can
modify some of the TarInfo's attributes before you add it using
addfile(). If given, `arcname' specifies an alternative name for the
file in the archive.
"""
self._check("aw")
# When fileobj is given, replace name by
# fileobj's real name.
if fileobj is not None:
name = fileobj.name
# Building the name of the member in the archive.
# Backward slashes are converted to forward slashes,
# Absolute paths are turned to relative paths.
if arcname is None:
arcname = name
drv, arcname = os.path.splitdrive(arcname)
arcname = arcname.replace(os.sep, "/")
arcname = arcname.lstrip("/")
# Now, fill the TarInfo object with
# information specific for the file.
tarinfo = self.tarinfo()
tarinfo.tarfile = self
# Use os.stat or os.lstat, depending on platform
# and if symlinks shall be resolved.
if fileobj is None:
if hasattr(os, "lstat") and not self.dereference:
statres = os.lstat(name)
else:
statres = os.stat(name)
else:
statres = os.fstat(fileobj.fileno())
linkname = ""
stmd = statres.st_mode
if stat.S_ISREG(stmd):
inode = (statres.st_ino, statres.st_dev)
if not self.dereference and statres.st_nlink > 1 and \
inode in self.inodes and arcname != self.inodes[inode]:
# Is it a hardlink to an already
# archived file?
type = LNKTYPE
linkname = self.inodes[inode]
else:
# The inode is added only if its valid.
# For win32 it is always 0.
type = REGTYPE
if inode[0]:
self.inodes[inode] = arcname
elif stat.S_ISDIR(stmd):
type = DIRTYPE
elif stat.S_ISFIFO(stmd):
type = FIFOTYPE
elif stat.S_ISLNK(stmd):
type = SYMTYPE
linkname = os.readlink(name)
elif stat.S_ISCHR(stmd):
type = CHRTYPE
elif stat.S_ISBLK(stmd):
type = BLKTYPE
else:
return None
# Fill the TarInfo object with all
# information we can get.
tarinfo.name = arcname
tarinfo.mode = stmd
tarinfo.uid = statres.st_uid
tarinfo.gid = statres.st_gid
if type == REGTYPE:
tarinfo.size = statres.st_size
else:
tarinfo.size = 0
tarinfo.mtime = statres.st_mtime
tarinfo.type = type
tarinfo.linkname = linkname
if pwd:
try:
tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
except KeyError:
pass
if grp:
try:
tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
except KeyError:
pass
if type in (CHRTYPE, BLKTYPE):
if hasattr(os, "major") and hasattr(os, "minor"):
tarinfo.devmajor = os.major(statres.st_rdev)
tarinfo.devminor = os.minor(statres.st_rdev)
return tarinfo
def list(self, verbose=True):
"""Print a table of contents to sys.stdout. If `verbose' is False, only
the names of the members are printed. If it is True, an `ls -l'-like
output is produced.
"""
self._check()
for tarinfo in self:
if verbose:
print(filemode(tarinfo.mode), end=' ')
print("%s/%s" % (tarinfo.uname or tarinfo.uid,
tarinfo.gname or tarinfo.gid), end=' ')
if tarinfo.ischr() or tarinfo.isblk():
print("%10s" % ("%d,%d" \
% (tarinfo.devmajor, tarinfo.devminor)), end=' ')
else:
print("%10d" % tarinfo.size, end=' ')
print("%d-%02d-%02d %02d:%02d:%02d" \
% time.localtime(tarinfo.mtime)[:6], end=' ')
print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
if verbose:
if tarinfo.issym():
print("->", tarinfo.linkname, end=' ')
if tarinfo.islnk():
print("link to", tarinfo.linkname, end=' ')
print()
def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
"""Add the file `name' to the archive. `name' may be any type of file
(directory, fifo, symbolic link, etc.). If given, `arcname'
specifies an alternative name for the file in the archive.
Directories are added recursively by default. This can be avoided by
setting `recursive' to False. `exclude' is a function that should
return True for each filename to be excluded. `filter' is a function
that expects a TarInfo object argument and returns the changed
TarInfo object, if it returns None the TarInfo object will be
excluded from the archive.
"""
self._check("aw")
if arcname is None:
arcname = name
# Exclude pathnames.
if exclude is not None:
import warnings
warnings.warn("use the filter argument instead",
DeprecationWarning, 2)
if exclude(name):
self._dbg(2, "tarfile: Excluded %r" % name)
return
# Skip if somebody tries to archive the archive...
if self.name is not None and os.path.abspath(name) == self.name:
self._dbg(2, "tarfile: Skipped %r" % name)
return
self._dbg(1, name)
# Create a TarInfo object from the file.
tarinfo = self.gettarinfo(name, arcname)
if tarinfo is None:
self._dbg(1, "tarfile: Unsupported type %r" % name)
return
# Change or exclude the TarInfo object.
if filter is not None:
tarinfo = filter(tarinfo)
if tarinfo is None:
self._dbg(2, "tarfile: Excluded %r" % name)
return
# Append the tar header and data to the archive.
if tarinfo.isreg():
f = bltn_open(name, "rb")
self.addfile(tarinfo, f)
f.close()
elif tarinfo.isdir():
self.addfile(tarinfo)
if recursive:
for f in os.listdir(name):
self.add(os.path.join(name, f), os.path.join(arcname, f),
recursive, exclude, filter=filter)
else:
self.addfile(tarinfo)
def addfile(self, tarinfo, fileobj=None):
"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
given, tarinfo.size bytes are read from it and added to the archive.
You can create TarInfo objects using gettarinfo().
On Windows platforms, `fileobj' should always be opened with mode
'rb' to avoid irritation about the file size.
"""
self._check("aw")
tarinfo = copy.copy(tarinfo)
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
self.fileobj.write(buf)
self.offset += len(buf)
# If there's data to follow, append it.
if fileobj is not None:
copyfileobj(fileobj, self.fileobj, tarinfo.size)
blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
if remainder > 0:
self.fileobj.write(NUL * (BLOCKSIZE - remainder))
blocks += 1
self.offset += blocks * BLOCKSIZE
self.members.append(tarinfo)
def extractall(self, path=".", members=None):
"""Extract all members from the archive to the current working
directory and set owner, modification time and permissions on
directories afterwards. `path' specifies a different directory
to extract to. `members' is optional and must be a subset of the
list returned by getmembers().
"""
directories = []
if members is None:
members = self
for tarinfo in members:
if tarinfo.isdir():
# Extract directories with a safe mode.
directories.append(tarinfo)
tarinfo = copy.copy(tarinfo)
tarinfo.mode = 0o700
# Do not set_attrs directories, as we will do that further down
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
# Reverse sort directories.
directories.sort(key=lambda a: a.name)
directories.reverse()
# Set correct owner, mtime and filemode on directories.
for tarinfo in directories:
dirpath = os.path.join(path, tarinfo.name)
try:
self.chown(tarinfo, dirpath)
self.utime(tarinfo, dirpath)
self.chmod(tarinfo, dirpath)
except ExtractError as e:
if self.errorlevel > 1:
raise
else:
self._dbg(1, "tarfile: %s" % e)
def extract(self, member, path="", set_attrs=True):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a TarInfo object. You can
specify a different directory using `path'. File attributes (owner,
mtime, mode) are set unless `set_attrs' is False.
"""
self._check("r")
if isinstance(member, str):
tarinfo = self.getmember(member)
else:
tarinfo = member
# Prepare the link target for makelink().
if tarinfo.islnk():
tarinfo._link_target = os.path.join(path, tarinfo.linkname)
try:
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
set_attrs=set_attrs)
except EnvironmentError as e:
if self.errorlevel > 0:
raise
else:
if e.filename is None:
self._dbg(1, "tarfile: %s" % e.strerror)
else:
self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
except ExtractError as e:
if self.errorlevel > 1:
raise
else:
self._dbg(1, "tarfile: %s" % e)
def extractfile(self, member):
"""Extract a member from the archive as a file object. `member' may be
a filename or a TarInfo object. If `member' is a regular file, a
file-like object is returned. If `member' is a link, a file-like
object is constructed from the link's target. If `member' is none of
the above, None is returned.
The file-like object is read-only and provides the following
methods: read(), readline(), readlines(), seek() and tell()
"""
self._check("r")
if isinstance(member, str):
tarinfo = self.getmember(member)
else:
tarinfo = member
if tarinfo.isreg():
return self.fileobject(self, tarinfo)
elif tarinfo.type not in SUPPORTED_TYPES:
# If a member's type is unknown, it is treated as a
# regular file.
return self.fileobject(self, tarinfo)
elif tarinfo.islnk() or tarinfo.issym():
if isinstance(self.fileobj, _Stream):
# A small but ugly workaround for the case that someone tries
# to extract a (sym)link as a file-object from a non-seekable
# stream of tar blocks.
raise StreamError("cannot extract (sym)link as file object")
else:
# A (sym)link's file object is its target's file object.
return self.extractfile(self._find_link_target(tarinfo))
else:
# If there's no data associated with the member (directory, chrdev,
# blkdev, etc.), return None instead of a file object.
return None
def _extract_member(self, tarinfo, targetpath, set_attrs=True):
"""Extract the TarInfo object tarinfo to a physical
file called targetpath.
"""
# Fetch the TarInfo object for the given name
# and build the destination pathname, replacing
# forward slashes to platform specific separators.
targetpath = targetpath.rstrip("/")
targetpath = targetpath.replace("/", os.sep)
# Create all upper directories.
upperdirs = os.path.dirname(targetpath)
if upperdirs and not os.path.exists(upperdirs):
# Create directories that are not part of the archive with
# default permissions.
os.makedirs(upperdirs)
if tarinfo.islnk() or tarinfo.issym():
self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
else:
self._dbg(1, tarinfo.name)
if tarinfo.isreg():
self.makefile(tarinfo, targetpath)
elif tarinfo.isdir():
self.makedir(tarinfo, targetpath)
elif tarinfo.isfifo():
self.makefifo(tarinfo, targetpath)
elif tarinfo.ischr() or tarinfo.isblk():
self.makedev(tarinfo, targetpath)
elif tarinfo.islnk() or tarinfo.issym():
self.makelink(tarinfo, targetpath)
elif tarinfo.type not in SUPPORTED_TYPES:
self.makeunknown(tarinfo, targetpath)
else:
self.makefile(tarinfo, targetpath)
if set_attrs:
self.chown(tarinfo, targetpath)
if not tarinfo.issym():
self.chmod(tarinfo, targetpath)
self.utime(tarinfo, targetpath)
#--------------------------------------------------------------------------
# Below are the different file methods. They are called via
# _extract_member() when extract() is called. They can be replaced in a
# subclass to implement other functionality.
def makedir(self, tarinfo, targetpath):
"""Make a directory called targetpath.
"""
try:
# Use a safe mode for the directory, the real mode is set
# later in _extract_member().
os.mkdir(targetpath, 0o700)
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
def makefile(self, tarinfo, targetpath):
"""Make a file called targetpath.
"""
source = self.fileobj
source.seek(tarinfo.offset_data)
target = bltn_open(targetpath, "wb")
if tarinfo.sparse is not None:
for offset, size in tarinfo.sparse:
target.seek(offset)
copyfileobj(source, target, size)
else:
copyfileobj(source, target, tarinfo.size)
target.seek(tarinfo.size)
target.truncate()
target.close()
def makeunknown(self, tarinfo, targetpath):
"""Make a file from a TarInfo object with an unknown type
at targetpath.
"""
self.makefile(tarinfo, targetpath)
self._dbg(1, "tarfile: Unknown file type %r, " \
"extracted as regular file." % tarinfo.type)
def makefifo(self, tarinfo, targetpath):
"""Make a fifo called targetpath.
"""
if hasattr(os, "mkfifo"):
os.mkfifo(targetpath)
else:
raise ExtractError("fifo not supported by system")
def makedev(self, tarinfo, targetpath):
"""Make a character or block device called targetpath.
"""
if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
raise ExtractError("special devices not supported by system")
mode = tarinfo.mode
if tarinfo.isblk():
mode |= stat.S_IFBLK
else:
mode |= stat.S_IFCHR
os.mknod(targetpath, mode,
os.makedev(tarinfo.devmajor, tarinfo.devminor))
def makelink(self, tarinfo, targetpath):
"""Make a (symbolic) link called targetpath. If it cannot be created
(platform limitation), we try to make a copy of the referenced file
instead of a link.
"""
try:
# For systems that support symbolic and hard links.
if tarinfo.issym():
os.symlink(tarinfo.linkname, targetpath)
else:
# See extract().
if os.path.exists(tarinfo._link_target):
os.link(tarinfo._link_target, targetpath)
else:
self._extract_member(self._find_link_target(tarinfo),
targetpath)
except symlink_exception:
if tarinfo.issym():
linkpath = os.path.join(os.path.dirname(tarinfo.name),
tarinfo.linkname)
else:
linkpath = tarinfo.linkname
else:
try:
self._extract_member(self._find_link_target(tarinfo),
targetpath)
except KeyError:
raise ExtractError("unable to resolve link inside archive")
def chown(self, tarinfo, targetpath):
"""Set owner of targetpath according to tarinfo.
"""
if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
# We have to be root to do so.
try:
g = grp.getgrnam(tarinfo.gname)[2]
except KeyError:
g = tarinfo.gid
try:
u = pwd.getpwnam(tarinfo.uname)[2]
except KeyError:
u = tarinfo.uid
try:
if tarinfo.issym() and hasattr(os, "lchown"):
os.lchown(targetpath, u, g)
else:
if sys.platform != "os2emx":
os.chown(targetpath, u, g)
except EnvironmentError as e:
raise ExtractError("could not change owner")
def chmod(self, tarinfo, targetpath):
"""Set file permissions of targetpath according to tarinfo.
"""
if hasattr(os, 'chmod'):
try:
os.chmod(targetpath, tarinfo.mode)
except EnvironmentError as e:
raise ExtractError("could not change mode")
def utime(self, tarinfo, targetpath):
"""Set modification time of targetpath according to tarinfo.
"""
if not hasattr(os, 'utime'):
return
try:
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
except EnvironmentError as e:
raise ExtractError("could not change modification time")
#--------------------------------------------------------------------------
def next(self):
"""Return the next member of the archive as a TarInfo object, when
TarFile is opened for reading. Return None if there is no more
available.
"""
self._check("ra")
if self.firstmember is not None:
m = self.firstmember
self.firstmember = None
return m
# Read the next block.
self.fileobj.seek(self.offset)
tarinfo = None
while True:
try:
tarinfo = self.tarinfo.fromtarfile(self)
except EOFHeaderError as e:
if self.ignore_zeros:
self._dbg(2, "0x%X: %s" % (self.offset, e))
self.offset += BLOCKSIZE
continue
except InvalidHeaderError as e:
if self.ignore_zeros:
self._dbg(2, "0x%X: %s" % (self.offset, e))
self.offset += BLOCKSIZE
continue
elif self.offset == 0:
raise ReadError(str(e))
except EmptyHeaderError:
if self.offset == 0:
raise ReadError("empty file")
except TruncatedHeaderError as e:
if self.offset == 0:
raise ReadError(str(e))
except SubsequentHeaderError as e:
raise ReadError(str(e))
break
if tarinfo is not None:
self.members.append(tarinfo)
else:
self._loaded = True
return tarinfo
#--------------------------------------------------------------------------
# Little helper methods:
def _getmember(self, name, tarinfo=None, normalize=False):
"""Find an archive member by name from bottom to top.
If tarinfo is given, it is used as the starting point.
"""
# Ensure that all members have been loaded.
members = self.getmembers()
# Limit the member search list up to tarinfo.
if tarinfo is not None:
members = members[:members.index(tarinfo)]
if normalize:
name = os.path.normpath(name)
for member in reversed(members):
if normalize:
member_name = os.path.normpath(member.name)
else:
member_name = member.name
if name == member_name:
return member
def _load(self):
"""Read through the entire archive file and look for readable
members.
"""
while True:
tarinfo = self.next()
if tarinfo is None:
break
self._loaded = True
def _check(self, mode=None):
"""Check if TarFile is still open, and if the operation's mode
corresponds to TarFile's mode.
"""
if self.closed:
raise IOError("%s is closed" % self.__class__.__name__)
if mode is not None and self.mode not in mode:
raise IOError("bad operation for mode %r" % self.mode)
def _find_link_target(self, tarinfo):
"""Find the target member of a symlink or hardlink member in the
archive.
"""
if tarinfo.issym():
# Always search the entire archive.
linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
limit = None
else:
# Search the archive before the link, because a hard link is
# just a reference to an already archived file.
linkname = tarinfo.linkname
limit = tarinfo
member = self._getmember(linkname, tarinfo=limit, normalize=True)
if member is None:
raise KeyError("linkname %r not found" % linkname)
return member
def __iter__(self):
"""Provide an iterator object.
"""
if self._loaded:
return iter(self.members)
else:
return TarIter(self)
def _dbg(self, level, msg):
"""Write debugging output to sys.stderr.
"""
if level <= self.debug:
print(msg, file=sys.stderr)
def __enter__(self):
self._check()
return self
def __exit__(self, type, value, traceback):
if type is None:
self.close()
else:
# An exception occurred. We must not call close() because
# it would try to write end-of-archive blocks and padding.
if not self._extfileobj:
self.fileobj.close()
self.closed = True
# class TarFile
class TarIter(object):
"""Iterator Class.
for tarinfo in TarFile(...):
suite...
"""
def __init__(self, tarfile):
"""Construct a TarIter object.
"""
self.tarfile = tarfile
self.index = 0
def __iter__(self):
"""Return iterator object.
"""
return self
def __next__(self):
"""Return the next item using TarFile's next() method.
When all members have been read, set TarFile as _loaded.
"""
# Fix for SF #1100429: Under rare circumstances it can
# happen that getmembers() is called during iteration,
# which will cause TarIter to stop prematurely.
if not self.tarfile._loaded:
tarinfo = self.tarfile.next()
if not tarinfo:
self.tarfile._loaded = True
raise StopIteration
else:
try:
tarinfo = self.tarfile.members[self.index]
except IndexError:
raise StopIteration
self.index += 1
return tarinfo
next = __next__ # for Python 2.x
#--------------------
# exported functions
#--------------------
def is_tarfile(name):
"""Return True if name points to a tar archive that we
are able to handle, else return False.
"""
try:
t = open(name)
t.close()
return True
except TarError:
return False
bltn_open = open
open = TarFile.open
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2016 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from __future__ import absolute_import
import os
import re
import sys
try:
import ssl
except ImportError:
ssl = None
if sys.version_info[0] < 3: # pragma: no cover
from StringIO import StringIO
string_types = basestring,
text_type = unicode
from types import FileType as file_type
import __builtin__ as builtins
import ConfigParser as configparser
from ._backport import shutil
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit
from urllib import (urlretrieve, quote as _quote, unquote, url2pathname,
pathname2url, ContentTooShortError, splittype)
def quote(s):
if isinstance(s, unicode):
s = s.encode('utf-8')
return _quote(s)
import urllib2
from urllib2 import (Request, urlopen, URLError, HTTPError,
HTTPBasicAuthHandler, HTTPPasswordMgr,
HTTPHandler, HTTPRedirectHandler,
build_opener)
if ssl:
from urllib2 import HTTPSHandler
import httplib
import xmlrpclib
import Queue as queue
from HTMLParser import HTMLParser
import htmlentitydefs
raw_input = raw_input
from itertools import ifilter as filter
from itertools import ifilterfalse as filterfalse
_userprog = None
def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
global _userprog
if _userprog is None:
import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
if match: return match.group(1, 2)
return None, host
else: # pragma: no cover
from io import StringIO
string_types = str,
text_type = str
from io import TextIOWrapper as file_type
import builtins
import configparser
import shutil
from urllib.parse import (urlparse, urlunparse, urljoin, splituser, quote,
unquote, urlsplit, urlunsplit, splittype)
from urllib.request import (urlopen, urlretrieve, Request, url2pathname,
pathname2url,
HTTPBasicAuthHandler, HTTPPasswordMgr,
HTTPHandler, HTTPRedirectHandler,
build_opener)
if ssl:
from urllib.request import HTTPSHandler
from urllib.error import HTTPError, URLError, ContentTooShortError
import http.client as httplib
import urllib.request as urllib2
import xmlrpc.client as xmlrpclib
import queue
from html.parser import HTMLParser
import html.entities as htmlentitydefs
raw_input = input
from itertools import filterfalse
filter = filter
try:
from ssl import match_hostname, CertificateError
except ImportError: # pragma: no cover
class CertificateError(ValueError):
pass
def _dnsname_match(dn, hostname, max_wildcards=1):
"""Matching according to RFC 6125, section 6.4.3
http://tools.ietf.org/html/rfc6125#section-6.4.3
"""
pats = []
if not dn:
return False
parts = dn.split('.')
leftmost, remainder = parts[0], parts[1:]
wildcards = leftmost.count('*')
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
# than one wildcard per fragment. A survey of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn))
# speed up common case w/o wildcards
if not wildcards:
return dn.lower() == hostname.lower()
# RFC 6125, section 6.4.3, subitem 1.
# The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == '*':
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append('[^.]+')
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
# add the remaining fragments, ignore any wildcards
for frag in remainder:
pats.append(re.escape(frag))
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
return pat.match(hostname)
def match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function
returns nothing.
"""
if not cert:
raise ValueError("empty or no certificate, match_hostname needs a "
"SSL socket or SSL context with either "
"CERT_OPTIONAL or CERT_REQUIRED")
dnsnames = []
san = cert.get('subjectAltName', ())
for key, value in san:
if key == 'DNS':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if not dnsnames:
# The subject is only checked when there is no dNSName entry
# in subjectAltName
for sub in cert.get('subject', ()):
for key, value in sub:
# XXX according to RFC 2818, the most specific Common Name
# must be used.
if key == 'commonName':
if _dnsname_match(value, hostname):
return
dnsnames.append(value)
if len(dnsnames) > 1:
raise CertificateError("hostname %r "
"doesn't match either of %s"
% (hostname, ', '.join(map(repr, dnsnames))))
elif len(dnsnames) == 1:
raise CertificateError("hostname %r "
"doesn't match %r"
% (hostname, dnsnames[0]))
else:
raise CertificateError("no appropriate commonName or "
"subjectAltName fields were found")
try:
from types import SimpleNamespace as Container
except ImportError: # pragma: no cover
class Container(object):
"""
A generic container for when multiple values need to be returned
"""
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
try:
from shutil import which
except ImportError: # pragma: no cover
# Implementation from Python 3.3
def which(cmd, mode=os.F_OK | os.X_OK, path=None):
"""Given a command, mode, and a PATH string, return the path which
conforms to the given mode on the PATH, or None if there is no such
file.
`mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
of os.environ.get("PATH"), or can be overridden with a custom search
path.
"""
# Check that a given file can be accessed with the correct mode.
# Additionally check that `file` is not a directory, as on Windows
# directories pass the os.access check.
def _access_check(fn, mode):
return (os.path.exists(fn) and os.access(fn, mode)
and not os.path.isdir(fn))
# If we're given a path with a directory part, look it up directly rather
# than referring to PATH directories. This includes checking relative to the
# current directory, e.g. ./script
if os.path.dirname(cmd):
if _access_check(cmd, mode):
return cmd
return None
if path is None:
path = os.environ.get("PATH", os.defpath)
if not path:
return None
path = path.split(os.pathsep)
if sys.platform == "win32":
# The current directory takes precedence on Windows.
if not os.curdir in path:
path.insert(0, os.curdir)
# PATHEXT is necessary to check on Windows.
pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
# See if the given file matches any of the expected path extensions.
# This will allow us to short circuit when given "python.exe".
# If it does match, only test that one, otherwise we have to try
# others.
if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
files = [cmd]
else:
files = [cmd + ext for ext in pathext]
else:
# On other platforms you don't have things like PATHEXT to tell you
# what file suffixes are executable, so just pass on cmd as-is.
files = [cmd]
seen = set()
for dir in path:
normdir = os.path.normcase(dir)
if not normdir in seen:
seen.add(normdir)
for thefile in files:
name = os.path.join(dir, thefile)
if _access_check(name, mode):
return name
return None
# ZipFile is a context manager in 2.7, but not in 2.6
from zipfile import ZipFile as BaseZipFile
if hasattr(BaseZipFile, '__enter__'): # pragma: no cover
ZipFile = BaseZipFile
else:
from zipfile import ZipExtFile as BaseZipExtFile
class ZipExtFile(BaseZipExtFile):
def __init__(self, base):
self.__dict__.update(base.__dict__)
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
# return None, so if an exception occurred, it will propagate
class ZipFile(BaseZipFile):
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
# return None, so if an exception occurred, it will propagate
def open(self, *args, **kwargs):
base = BaseZipFile.open(self, *args, **kwargs)
return ZipExtFile(base)
try:
from platform import python_implementation
except ImportError: # pragma: no cover
def python_implementation():
"""Return a string identifying the Python implementation."""
if 'PyPy' in sys.version:
return 'PyPy'
if os.name == 'java':
return 'Jython'
if sys.version.startswith('IronPython'):
return 'IronPython'
return 'CPython'
try:
import sysconfig
except ImportError: # pragma: no cover
from ._backport import sysconfig
try:
callable = callable
except NameError: # pragma: no cover
from collections import Callable
def callable(obj):
return isinstance(obj, Callable)
try:
fsencode = os.fsencode
fsdecode = os.fsdecode
except AttributeError: # pragma: no cover
_fsencoding = sys.getfilesystemencoding()
if _fsencoding == 'mbcs':
_fserrors = 'strict'
else:
_fserrors = 'surrogateescape'
def fsencode(filename):
if isinstance(filename, bytes):
return filename
elif isinstance(filename, text_type):
return filename.encode(_fsencoding, _fserrors)
else:
raise TypeError("expect bytes or str, not %s" %
type(filename).__name__)
def fsdecode(filename):
if isinstance(filename, text_type):
return filename
elif isinstance(filename, bytes):
return filename.decode(_fsencoding, _fserrors)
else:
raise TypeError("expect bytes or str, not %s" %
type(filename).__name__)
try:
from tokenize import detect_encoding
except ImportError: # pragma: no cover
from codecs import BOM_UTF8, lookup
import re
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
def _get_normal_name(orig_enc):
"""Imitates get_normal_name in tokenizer.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
return "utf-8"
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
return "iso-8859-1"
return orig_enc
def detect_encoding(readline):
"""
The detect_encoding() function is used to detect the encoding that should
be used to decode a Python source file. It requires one argument, readline,
in the same way as the tokenize() generator.
It will call readline a maximum of twice, and return the encoding used
(as a string) and a list of any lines (left as bytes) it has read in.
It detects the encoding from the presence of a utf-8 bom or an encoding
cookie as specified in pep-0263. If both a bom and a cookie are present,
but disagree, a SyntaxError will be raised. If the encoding cookie is an
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
'utf-8-sig' is returned.
If no encoding is specified, then the default of 'utf-8' will be returned.
"""
try:
filename = readline.__self__.name
except AttributeError:
filename = None
bom_found = False
encoding = None
default = 'utf-8'
def read_or_stop():
try:
return readline()
except StopIteration:
return b''
def find_cookie(line):
try:
# Decode as UTF-8. Either the line is an encoding declaration,
# in which case it should be pure ASCII, or it must be UTF-8
# per default encoding.
line_string = line.decode('utf-8')
except UnicodeDecodeError:
msg = "invalid or missing encoding declaration"
if filename is not None:
msg = '{} for {!r}'.format(msg, filename)
raise SyntaxError(msg)
matches = cookie_re.findall(line_string)
if not matches:
return None
encoding = _get_normal_name(matches[0])
try:
codec = lookup(encoding)
except LookupError:
# This behaviour mimics the Python interpreter
if filename is None:
msg = "unknown encoding: " + encoding
else:
msg = "unknown encoding for {!r}: {}".format(filename,
encoding)
raise SyntaxError(msg)
if bom_found:
if codec.name != 'utf-8':
# This behaviour mimics the Python interpreter
if filename is None:
msg = 'encoding problem: utf-8'
else:
msg = 'encoding problem for {!r}: utf-8'.format(filename)
raise SyntaxError(msg)
encoding += '-sig'
return encoding
first = read_or_stop()
if first.startswith(BOM_UTF8):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
if not first:
return default, []
encoding = find_cookie(first)
if encoding:
return encoding, [first]
second = read_or_stop()
if not second:
return default, [first]
encoding = find_cookie(second)
if encoding:
return encoding, [first, second]
return default, [first, second]
# For converting & <-> &amp; etc.
try:
from html import escape
except ImportError:
from cgi import escape
if sys.version_info[:2] < (3, 4):
unescape = HTMLParser().unescape
else:
from html import unescape
try:
from collections import ChainMap
except ImportError: # pragma: no cover
from collections import MutableMapping
try:
from reprlib import recursive_repr as _recursive_repr
except ImportError:
def _recursive_repr(fillvalue='...'):
'''
Decorator to make a repr function return fillvalue for a recursive
call
'''
def decorating_function(user_function):
repr_running = set()
def wrapper(self):
key = id(self), get_ident()
if key in repr_running:
return fillvalue
repr_running.add(key)
try:
result = user_function(self)
finally:
repr_running.discard(key)
return result
# Can't use functools.wraps() here because of bootstrap issues
wrapper.__module__ = getattr(user_function, '__module__')
wrapper.__doc__ = getattr(user_function, '__doc__')
wrapper.__name__ = getattr(user_function, '__name__')
wrapper.__annotations__ = getattr(user_function, '__annotations__', {})
return wrapper
return decorating_function
class ChainMap(MutableMapping):
''' A ChainMap groups multiple dicts (or other mappings) together
to create a single, updateable view.
The underlying mappings are stored in a list. That list is public and can
accessed or updated using the *maps* attribute. There is no other state.
Lookups search the underlying mappings successively until a key is found.
In contrast, writes, updates, and deletions only operate on the first
mapping.
'''
def __init__(self, *maps):
'''Initialize a ChainMap by setting *maps* to the given mappings.
If no mappings are provided, a single empty dictionary is used.
'''
self.maps = list(maps) or [{}] # always at least one map
def __missing__(self, key):
raise KeyError(key)
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def get(self, key, default=None):
return self[key] if key in self else default
def __len__(self):
return len(set().union(*self.maps)) # reuses stored hash values if possible
def __iter__(self):
return iter(set().union(*self.maps))
def __contains__(self, key):
return any(key in m for m in self.maps)
def __bool__(self):
return any(self.maps)
@_recursive_repr()
def __repr__(self):
return '{0.__class__.__name__}({1})'.format(
self, ', '.join(map(repr, self.maps)))
@classmethod
def fromkeys(cls, iterable, *args):
'Create a ChainMap with a single dict created from the iterable.'
return cls(dict.fromkeys(iterable, *args))
def copy(self):
'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
return self.__class__(self.maps[0].copy(), *self.maps[1:])
__copy__ = copy
def new_child(self): # like Django's Context.push()
'New ChainMap with a new dict followed by all previous maps.'
return self.__class__({}, *self.maps)
@property
def parents(self): # like Django's Context.pop()
'New ChainMap from maps[1:].'
return self.__class__(*self.maps[1:])
def __setitem__(self, key, value):
self.maps[0][key] = value
def __delitem__(self, key):
try:
del self.maps[0][key]
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'.format(key))
def popitem(self):
'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
try:
return self.maps[0].popitem()
except KeyError:
raise KeyError('No keys found in the first mapping.')
def pop(self, key, *args):
'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
try:
return self.maps[0].pop(key, *args)
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'.format(key))
def clear(self):
'Clear maps[0], leaving maps[1:] intact.'
self.maps[0].clear()
try:
from imp import cache_from_source
except ImportError: # pragma: no cover
def cache_from_source(path, debug_override=None):
assert path.endswith('.py')
if debug_override is None:
debug_override = __debug__
if debug_override:
suffix = 'c'
else:
suffix = 'o'
return path + suffix
try:
from collections import OrderedDict
except ImportError: # pragma: no cover
## {{{ http://code.activestate.com/recipes/576693/ (r9)
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
# Passes Python2.7's test suite and incorporates all the latest updates.
try:
from thread import get_ident as _get_ident
except ImportError:
from dummy_thread import get_ident as _get_ident
try:
from _abcoll import KeysView, ValuesView, ItemsView
except ImportError:
pass
class OrderedDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
# Big-O running times for all methods are the same as for regular dictionaries.
# The internal self.__map dictionary maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
def __init__(self, *args, **kwds):
'''Initialize an ordered dictionary. Signature is the same as for
regular dictionaries, but keyword arguments are not recommended
because their insertion order is arbitrary.
'''
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__root
except AttributeError:
self.__root = root = [] # sentinel node
root[:] = [root, root, None]
self.__map = {}
self.__update(*args, **kwds)
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link which goes at the end of the linked
# list, and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[0]
last[1] = root[0] = self.__map[key] = [last, root, key]
dict_setitem(self, key, value)
def __delitem__(self, key, dict_delitem=dict.__delitem__):
'od.__delitem__(y) <==> del od[y]'
# Deleting an existing item uses self.__map to find the link which is
# then removed by updating the links in the predecessor and successor nodes.
dict_delitem(self, key)
link_prev, link_next, key = self.__map.pop(key)
link_prev[1] = link_next
link_next[0] = link_prev
def __iter__(self):
'od.__iter__() <==> iter(od)'
root = self.__root
curr = root[1]
while curr is not root:
yield curr[2]
curr = curr[1]
def __reversed__(self):
'od.__reversed__() <==> reversed(od)'
root = self.__root
curr = root[0]
while curr is not root:
yield curr[2]
curr = curr[0]
def clear(self):
'od.clear() -> None. Remove all items from od.'
try:
for node in self.__map.itervalues():
del node[:]
root = self.__root
root[:] = [root, root, None]
self.__map.clear()
except AttributeError:
pass
dict.clear(self)
def popitem(self, last=True):
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
Pairs are returned in LIFO order if last is true or FIFO order if false.
'''
if not self:
raise KeyError('dictionary is empty')
root = self.__root
if last:
link = root[0]
link_prev = link[0]
link_prev[1] = root
root[0] = link_prev
else:
link = root[1]
link_next = link[1]
root[1] = link_next
link_next[0] = root
key = link[2]
del self.__map[key]
value = dict.pop(self, key)
return key, value
# -- the following methods do not depend on the internal structure --
def keys(self):
'od.keys() -> list of keys in od'
return list(self)
def values(self):
'od.values() -> list of values in od'
return [self[key] for key in self]
def items(self):
'od.items() -> list of (key, value) pairs in od'
return [(key, self[key]) for key in self]
def iterkeys(self):
'od.iterkeys() -> an iterator over the keys in od'
return iter(self)
def itervalues(self):
'od.itervalues -> an iterator over the values in od'
for k in self:
yield self[k]
def iteritems(self):
'od.iteritems -> an iterator over the (key, value) items in od'
for k in self:
yield (k, self[k])
def update(*args, **kwds):
'''od.update(E, **F) -> None. Update od from dict/iterable E and F.
If E is a dict instance, does: for k in E: od[k] = E[k]
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
Or if E is an iterable of items, does: for k, v in E: od[k] = v
In either case, this is followed by: for k, v in F.items(): od[k] = v
'''
if len(args) > 2:
raise TypeError('update() takes at most 2 positional '
'arguments (%d given)' % (len(args),))
elif not args:
raise TypeError('update() takes at least 1 argument (0 given)')
self = args[0]
# Make progressively weaker assumptions about "other"
other = ()
if len(args) == 2:
other = args[1]
if isinstance(other, dict):
for key in other:
self[key] = other[key]
elif hasattr(other, 'keys'):
for key in other.keys():
self[key] = other[key]
else:
for key, value in other:
self[key] = value
for key, value in kwds.items():
self[key] = value
__update = update # let subclasses override update without breaking __init__
__marker = object()
def pop(self, key, default=__marker):
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
'''
if key in self:
result = self[key]
del self[key]
return result
if default is self.__marker:
raise KeyError(key)
return default
def setdefault(self, key, default=None):
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
if key in self:
return self[key]
self[key] = default
return default
def __repr__(self, _repr_running=None):
'od.__repr__() <==> repr(od)'
if not _repr_running: _repr_running = {}
call_key = id(self), _get_ident()
if call_key in _repr_running:
return '...'
_repr_running[call_key] = 1
try:
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
finally:
del _repr_running[call_key]
def __reduce__(self):
'Return state information for pickling'
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
for k in vars(OrderedDict()):
inst_dict.pop(k, None)
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def copy(self):
'od.copy() -> a shallow copy of od'
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
and values equal to v (which defaults to None).
'''
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
while comparison to a regular mapping is order-insensitive.
'''
if isinstance(other, OrderedDict):
return len(self)==len(other) and self.items() == other.items()
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other
# -- the following methods are only used in Python 2.7 --
def viewkeys(self):
"od.viewkeys() -> a set-like object providing a view on od's keys"
return KeysView(self)
def viewvalues(self):
"od.viewvalues() -> an object providing a view on od's values"
return ValuesView(self)
def viewitems(self):
"od.viewitems() -> a set-like object providing a view on od's items"
return ItemsView(self)
try:
from logging.config import BaseConfigurator, valid_ident
except ImportError: # pragma: no cover
IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I)
def valid_ident(s):
m = IDENTIFIER.match(s)
if not m:
raise ValueError('Not a valid Python identifier: %r' % s)
return True
# The ConvertingXXX classes are wrappers around standard Python containers,
# and they serve to convert any suitable values in the container. The
# conversion converts base dicts, lists and tuples to their wrapped
# equivalents, whereas strings which match a conversion format are converted
# appropriately.
#
# Each wrapper should have a configurator attribute holding the actual
# configurator to use for conversion.
class ConvertingDict(dict):
"""A converting dictionary wrapper."""
def __getitem__(self, key):
value = dict.__getitem__(self, key)
result = self.configurator.convert(value)
#If the converted value is different, save for next time
if value is not result:
self[key] = result
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
result.key = key
return result
def get(self, key, default=None):
value = dict.get(self, key, default)
result = self.configurator.convert(value)
#If the converted value is different, save for next time
if value is not result:
self[key] = result
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
result.key = key
return result
def pop(self, key, default=None):
value = dict.pop(self, key, default)
result = self.configurator.convert(value)
if value is not result:
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
result.key = key
return result
class ConvertingList(list):
"""A converting list wrapper."""
def __getitem__(self, key):
value = list.__getitem__(self, key)
result = self.configurator.convert(value)
#If the converted value is different, save for next time
if value is not result:
self[key] = result
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
result.key = key
return result
def pop(self, idx=-1):
value = list.pop(self, idx)
result = self.configurator.convert(value)
if value is not result:
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
return result
class ConvertingTuple(tuple):
"""A converting tuple wrapper."""
def __getitem__(self, key):
value = tuple.__getitem__(self, key)
result = self.configurator.convert(value)
if value is not result:
if type(result) in (ConvertingDict, ConvertingList,
ConvertingTuple):
result.parent = self
result.key = key
return result
class BaseConfigurator(object):
"""
The configurator base class which defines some useful defaults.
"""
CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$')
WORD_PATTERN = re.compile(r'^\s*(\w+)\s*')
DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*')
INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*')
DIGIT_PATTERN = re.compile(r'^\d+$')
value_converters = {
'ext' : 'ext_convert',
'cfg' : 'cfg_convert',
}
# We might want to use a different one, e.g. importlib
importer = staticmethod(__import__)
def __init__(self, config):
self.config = ConvertingDict(config)
self.config.configurator = self
def resolve(self, s):
"""
Resolve strings to objects using standard import and attribute
syntax.
"""
name = s.split('.')
used = name.pop(0)
try:
found = self.importer(used)
for frag in name:
used += '.' + frag
try:
found = getattr(found, frag)
except AttributeError:
self.importer(used)
found = getattr(found, frag)
return found
except ImportError:
e, tb = sys.exc_info()[1:]
v = ValueError('Cannot resolve %r: %s' % (s, e))
v.__cause__, v.__traceback__ = e, tb
raise v
def ext_convert(self, value):
"""Default converter for the ext:// protocol."""
return self.resolve(value)
def cfg_convert(self, value):
"""Default converter for the cfg:// protocol."""
rest = value
m = self.WORD_PATTERN.match(rest)
if m is None:
raise ValueError("Unable to convert %r" % value)
else:
rest = rest[m.end():]
d = self.config[m.groups()[0]]
#print d, rest
while rest:
m = self.DOT_PATTERN.match(rest)
if m:
d = d[m.groups()[0]]
else:
m = self.INDEX_PATTERN.match(rest)
if m:
idx = m.groups()[0]
if not self.DIGIT_PATTERN.match(idx):
d = d[idx]
else:
try:
n = int(idx) # try as number first (most likely)
d = d[n]
except TypeError:
d = d[idx]
if m:
rest = rest[m.end():]
else:
raise ValueError('Unable to convert '
'%r at %r' % (value, rest))
#rest should be empty
return d
def convert(self, value):
"""
Convert values to an appropriate type. dicts, lists and tuples are
replaced by their converting alternatives. Strings are checked to
see if they have a conversion format and are converted if they do.
"""
if not isinstance(value, ConvertingDict) and isinstance(value, dict):
value = ConvertingDict(value)
value.configurator = self
elif not isinstance(value, ConvertingList) and isinstance(value, list):
value = ConvertingList(value)
value.configurator = self
elif not isinstance(value, ConvertingTuple) and\
isinstance(value, tuple):
value = ConvertingTuple(value)
value.configurator = self
elif isinstance(value, string_types):
m = self.CONVERT_PATTERN.match(value)
if m:
d = m.groupdict()
prefix = d['prefix']
converter = self.value_converters.get(prefix, None)
if converter:
suffix = d['suffix']
converter = getattr(self, converter)
value = converter(suffix)
return value
def configure_custom(self, config):
"""Configure an object with a user-supplied factory."""
c = config.pop('()')
if not callable(c):
c = self.resolve(c)
props = config.pop('.', None)
# Check for valid identifiers
kwargs = dict([(k, config[k]) for k in config if valid_ident(k)])
result = c(**kwargs)
if props:
for name, value in props.items():
setattr(result, name, value)
return result
def as_tuple(self, value):
"""Utility function which converts lists to tuples."""
if isinstance(value, list):
value = tuple(value)
return value
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2016 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""PEP 376 implementation."""
from __future__ import unicode_literals
import base64
import codecs
import contextlib
import hashlib
import logging
import os
import posixpath
import sys
import zipimport
from . import DistlibException, resources
from .compat import StringIO
from .version import get_scheme, UnsupportedVersionError
from .metadata import Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME
from .util import (parse_requirement, cached_property, parse_name_and_version,
read_exports, write_exports, CSVReader, CSVWriter)
__all__ = ['Distribution', 'BaseInstalledDistribution',
'InstalledDistribution', 'EggInfoDistribution',
'DistributionPath']
logger = logging.getLogger(__name__)
EXPORTS_FILENAME = 'pydist-exports.json'
COMMANDS_FILENAME = 'pydist-commands.json'
DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED',
'RESOURCES', EXPORTS_FILENAME, 'SHARED')
DISTINFO_EXT = '.dist-info'
class _Cache(object):
"""
A simple cache mapping names and .dist-info paths to distributions
"""
def __init__(self):
"""
Initialise an instance. There is normally one for each DistributionPath.
"""
self.name = {}
self.path = {}
self.generated = False
def clear(self):
"""
Clear the cache, setting it to its initial state.
"""
self.name.clear()
self.path.clear()
self.generated = False
def add(self, dist):
"""
Add a distribution to the cache.
:param dist: The distribution to add.
"""
if dist.path not in self.path:
self.path[dist.path] = dist
self.name.setdefault(dist.key, []).append(dist)
class DistributionPath(object):
"""
Represents a set of distributions installed on a path (typically sys.path).
"""
def __init__(self, path=None, include_egg=False):
"""
Create an instance from a path, optionally including legacy (distutils/
setuptools/distribute) distributions.
:param path: The path to use, as a list of directories. If not specified,
sys.path is used.
:param include_egg: If True, this instance will look for and return legacy
distributions as well as those based on PEP 376.
"""
if path is None:
path = sys.path
self.path = path
self._include_dist = True
self._include_egg = include_egg
self._cache = _Cache()
self._cache_egg = _Cache()
self._cache_enabled = True
self._scheme = get_scheme('default')
def _get_cache_enabled(self):
return self._cache_enabled
def _set_cache_enabled(self, value):
self._cache_enabled = value
cache_enabled = property(_get_cache_enabled, _set_cache_enabled)
def clear_cache(self):
"""
Clears the internal cache.
"""
self._cache.clear()
self._cache_egg.clear()
def _yield_distributions(self):
"""
Yield .dist-info and/or .egg(-info) distributions.
"""
# We need to check if we've seen some resources already, because on
# some Linux systems (e.g. some Debian/Ubuntu variants) there are
# symlinks which alias other files in the environment.
seen = set()
for path in self.path:
finder = resources.finder_for_path(path)
if finder is None:
continue
r = finder.find('')
if not r or not r.is_container:
continue
rset = sorted(r.resources)
for entry in rset:
r = finder.find(entry)
if not r or r.path in seen:
continue
if self._include_dist and entry.endswith(DISTINFO_EXT):
possible_filenames = [METADATA_FILENAME, WHEEL_METADATA_FILENAME]
for metadata_filename in possible_filenames:
metadata_path = posixpath.join(entry, metadata_filename)
pydist = finder.find(metadata_path)
if pydist:
break
else:
continue
with contextlib.closing(pydist.as_stream()) as stream:
metadata = Metadata(fileobj=stream, scheme='legacy')
logger.debug('Found %s', r.path)
seen.add(r.path)
yield new_dist_class(r.path, metadata=metadata,
env=self)
elif self._include_egg and entry.endswith(('.egg-info',
'.egg')):
logger.debug('Found %s', r.path)
seen.add(r.path)
yield old_dist_class(r.path, self)
def _generate_cache(self):
"""
Scan the path for distributions and populate the cache with
those that are found.
"""
gen_dist = not self._cache.generated
gen_egg = self._include_egg and not self._cache_egg.generated
if gen_dist or gen_egg:
for dist in self._yield_distributions():
if isinstance(dist, InstalledDistribution):
self._cache.add(dist)
else:
self._cache_egg.add(dist)
if gen_dist:
self._cache.generated = True
if gen_egg:
self._cache_egg.generated = True
@classmethod
def distinfo_dirname(cls, name, version):
"""
The *name* and *version* parameters are converted into their
filename-escaped form, i.e. any ``'-'`` characters are replaced
with ``'_'`` other than the one in ``'dist-info'`` and the one
separating the name from the version number.
:parameter name: is converted to a standard distribution name by replacing
any runs of non- alphanumeric characters with a single
``'-'``.
:type name: string
:parameter version: is converted to a standard version string. Spaces
become dots, and all other non-alphanumeric characters
(except dots) become dashes, with runs of multiple
dashes condensed to a single dash.
:type version: string
:returns: directory name
:rtype: string"""
name = name.replace('-', '_')
return '-'.join([name, version]) + DISTINFO_EXT
def get_distributions(self):
"""
Provides an iterator that looks for distributions and returns
:class:`InstalledDistribution` or
:class:`EggInfoDistribution` instances for each one of them.
:rtype: iterator of :class:`InstalledDistribution` and
:class:`EggInfoDistribution` instances
"""
if not self._cache_enabled:
for dist in self._yield_distributions():
yield dist
else:
self._generate_cache()
for dist in self._cache.path.values():
yield dist
if self._include_egg:
for dist in self._cache_egg.path.values():
yield dist
def get_distribution(self, name):
"""
Looks for a named distribution on the path.
This function only returns the first result found, as no more than one
value is expected. If nothing is found, ``None`` is returned.
:rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution`
or ``None``
"""
result = None
name = name.lower()
if not self._cache_enabled:
for dist in self._yield_distributions():
if dist.key == name:
result = dist
break
else:
self._generate_cache()
if name in self._cache.name:
result = self._cache.name[name][0]
elif self._include_egg and name in self._cache_egg.name:
result = self._cache_egg.name[name][0]
return result
def provides_distribution(self, name, version=None):
"""
Iterates over all distributions to find which distributions provide *name*.
If a *version* is provided, it will be used to filter the results.
This function only returns the first result found, since no more than
one values are expected. If the directory is not found, returns ``None``.
:parameter version: a version specifier that indicates the version
required, conforming to the format in ``PEP-345``
:type name: string
:type version: string
"""
matcher = None
if not version is None:
try:
matcher = self._scheme.matcher('%s (%s)' % (name, version))
except ValueError:
raise DistlibException('invalid name or version: %r, %r' %
(name, version))
for dist in self.get_distributions():
provided = dist.provides
for p in provided:
p_name, p_ver = parse_name_and_version(p)
if matcher is None:
if p_name == name:
yield dist
break
else:
if p_name == name and matcher.match(p_ver):
yield dist
break
def get_file_path(self, name, relative_path):
"""
Return the path to a resource file.
"""
dist = self.get_distribution(name)
if dist is None:
raise LookupError('no distribution named %r found' % name)
return dist.get_resource_path(relative_path)
def get_exported_entries(self, category, name=None):
"""
Return all of the exported entries in a particular category.
:param category: The category to search for entries.
:param name: If specified, only entries with that name are returned.
"""
for dist in self.get_distributions():
r = dist.exports
if category in r:
d = r[category]
if name is not None:
if name in d:
yield d[name]
else:
for v in d.values():
yield v
class Distribution(object):
"""
A base class for distributions, whether installed or from indexes.
Either way, it must have some metadata, so that's all that's needed
for construction.
"""
build_time_dependency = False
"""
Set to True if it's known to be only a build-time dependency (i.e.
not needed after installation).
"""
requested = False
"""A boolean that indicates whether the ``REQUESTED`` metadata file is
present (in other words, whether the package was installed by user
request or it was installed as a dependency)."""
def __init__(self, metadata):
"""
Initialise an instance.
:param metadata: The instance of :class:`Metadata` describing this
distribution.
"""
self.metadata = metadata
self.name = metadata.name
self.key = self.name.lower() # for case-insensitive comparisons
self.version = metadata.version
self.locator = None
self.digest = None
self.extras = None # additional features requested
self.context = None # environment marker overrides
self.download_urls = set()
self.digests = {}
@property
def source_url(self):
"""
The source archive download URL for this distribution.
"""
return self.metadata.source_url
download_url = source_url # Backward compatibility
@property
def name_and_version(self):
"""
A utility property which displays the name and version in parentheses.
"""
return '%s (%s)' % (self.name, self.version)
@property
def provides(self):
"""
A set of distribution names and versions provided by this distribution.
:return: A set of "name (version)" strings.
"""
plist = self.metadata.provides
s = '%s (%s)' % (self.name, self.version)
if s not in plist:
plist.append(s)
return plist
def _get_requirements(self, req_attr):
md = self.metadata
logger.debug('Getting requirements from metadata %r', md.todict())
reqts = getattr(md, req_attr)
return set(md.get_requirements(reqts, extras=self.extras,
env=self.context))
@property
def run_requires(self):
return self._get_requirements('run_requires')
@property
def meta_requires(self):
return self._get_requirements('meta_requires')
@property
def build_requires(self):
return self._get_requirements('build_requires')
@property
def test_requires(self):
return self._get_requirements('test_requires')
@property
def dev_requires(self):
return self._get_requirements('dev_requires')
def matches_requirement(self, req):
"""
Say if this instance matches (fulfills) a requirement.
:param req: The requirement to match.
:rtype req: str
:return: True if it matches, else False.
"""
# Requirement may contain extras - parse to lose those
# from what's passed to the matcher
r = parse_requirement(req)
scheme = get_scheme(self.metadata.scheme)
try:
matcher = scheme.matcher(r.requirement)
except UnsupportedVersionError:
# XXX compat-mode if cannot read the version
logger.warning('could not read version %r - using name only',
req)
name = req.split()[0]
matcher = scheme.matcher(name)
name = matcher.key # case-insensitive
result = False
for p in self.provides:
p_name, p_ver = parse_name_and_version(p)
if p_name != name:
continue
try:
result = matcher.match(p_ver)
break
except UnsupportedVersionError:
pass
return result
def __repr__(self):
"""
Return a textual representation of this instance,
"""
if self.source_url:
suffix = ' [%s]' % self.source_url
else:
suffix = ''
return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix)
def __eq__(self, other):
"""
See if this distribution is the same as another.
:param other: The distribution to compare with. To be equal to one
another. distributions must have the same type, name,
version and source_url.
:return: True if it is the same, else False.
"""
if type(other) is not type(self):
result = False
else:
result = (self.name == other.name and
self.version == other.version and
self.source_url == other.source_url)
return result
def __hash__(self):
"""
Compute hash in a way which matches the equality test.
"""
return hash(self.name) + hash(self.version) + hash(self.source_url)
class BaseInstalledDistribution(Distribution):
"""
This is the base class for installed distributions (whether PEP 376 or
legacy).
"""
hasher = None
def __init__(self, metadata, path, env=None):
"""
Initialise an instance.
:param metadata: An instance of :class:`Metadata` which describes the
distribution. This will normally have been initialised
from a metadata file in the ``path``.
:param path: The path of the ``.dist-info`` or ``.egg-info``
directory for the distribution.
:param env: This is normally the :class:`DistributionPath`
instance where this distribution was found.
"""
super(BaseInstalledDistribution, self).__init__(metadata)
self.path = path
self.dist_path = env
def get_hash(self, data, hasher=None):
"""
Get the hash of some data, using a particular hash algorithm, if
specified.
:param data: The data to be hashed.
:type data: bytes
:param hasher: The name of a hash implementation, supported by hashlib,
or ``None``. Examples of valid values are ``'sha1'``,
``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and
``'sha512'``. If no hasher is specified, the ``hasher``
attribute of the :class:`InstalledDistribution` instance
is used. If the hasher is determined to be ``None``, MD5
is used as the hashing algorithm.
:returns: The hash of the data. If a hasher was explicitly specified,
the returned hash will be prefixed with the specified hasher
followed by '='.
:rtype: str
"""
if hasher is None:
hasher = self.hasher
if hasher is None:
hasher = hashlib.md5
prefix = ''
else:
hasher = getattr(hashlib, hasher)
prefix = '%s=' % self.hasher
digest = hasher(data).digest()
digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii')
return '%s%s' % (prefix, digest)
class InstalledDistribution(BaseInstalledDistribution):
"""
Created with the *path* of the ``.dist-info`` directory provided to the
constructor. It reads the metadata contained in ``pydist.json`` when it is
instantiated., or uses a passed in Metadata instance (useful for when
dry-run mode is being used).
"""
hasher = 'sha256'
def __init__(self, path, metadata=None, env=None):
self.finder = finder = resources.finder_for_path(path)
if finder is None:
import pdb; pdb.set_trace ()
if env and env._cache_enabled and path in env._cache.path:
metadata = env._cache.path[path].metadata
elif metadata is None:
r = finder.find(METADATA_FILENAME)
# Temporary - for Wheel 0.23 support
if r is None:
r = finder.find(WHEEL_METADATA_FILENAME)
# Temporary - for legacy support
if r is None:
r = finder.find('METADATA')
if r is None:
raise ValueError('no %s found in %s' % (METADATA_FILENAME,
path))
with contextlib.closing(r.as_stream()) as stream:
metadata = Metadata(fileobj=stream, scheme='legacy')
super(InstalledDistribution, self).__init__(metadata, path, env)
if env and env._cache_enabled:
env._cache.add(self)
try:
r = finder.find('REQUESTED')
except AttributeError:
import pdb; pdb.set_trace ()
self.requested = r is not None
def __repr__(self):
return '<InstalledDistribution %r %s at %r>' % (
self.name, self.version, self.path)
def __str__(self):
return "%s %s" % (self.name, self.version)
def _get_records(self):
"""
Get the list of installed files for the distribution
:return: A list of tuples of path, hash and size. Note that hash and
size might be ``None`` for some entries. The path is exactly
as stored in the file (which is as in PEP 376).
"""
results = []
r = self.get_distinfo_resource('RECORD')
with contextlib.closing(r.as_stream()) as stream:
with CSVReader(stream=stream) as record_reader:
# Base location is parent dir of .dist-info dir
#base_location = os.path.dirname(self.path)
#base_location = os.path.abspath(base_location)
for row in record_reader:
missing = [None for i in range(len(row), 3)]
path, checksum, size = row + missing
#if not os.path.isabs(path):
# path = path.replace('/', os.sep)
# path = os.path.join(base_location, path)
results.append((path, checksum, size))
return results
@cached_property
def exports(self):
"""
Return the information exported by this distribution.
:return: A dictionary of exports, mapping an export category to a dict
of :class:`ExportEntry` instances describing the individual
export entries, and keyed by name.
"""
result = {}
r = self.get_distinfo_resource(EXPORTS_FILENAME)
if r:
result = self.read_exports()
return result
def read_exports(self):
"""
Read exports data from a file in .ini format.
:return: A dictionary of exports, mapping an export category to a list
of :class:`ExportEntry` instances describing the individual
export entries.
"""
result = {}
r = self.get_distinfo_resource(EXPORTS_FILENAME)
if r:
with contextlib.closing(r.as_stream()) as stream:
result = read_exports(stream)
return result
def write_exports(self, exports):
"""
Write a dictionary of exports to a file in .ini format.
:param exports: A dictionary of exports, mapping an export category to
a list of :class:`ExportEntry` instances describing the
individual export entries.
"""
rf = self.get_distinfo_file(EXPORTS_FILENAME)
with open(rf, 'w') as f:
write_exports(exports, f)
def get_resource_path(self, relative_path):
"""
NOTE: This API may change in the future.
Return the absolute path to a resource file with the given relative
path.
:param relative_path: The path, relative to .dist-info, of the resource
of interest.
:return: The absolute path where the resource is to be found.
"""
r = self.get_distinfo_resource('RESOURCES')
with contextlib.closing(r.as_stream()) as stream:
with CSVReader(stream=stream) as resources_reader:
for relative, destination in resources_reader:
if relative == relative_path:
return destination
raise KeyError('no resource file with relative path %r '
'is installed' % relative_path)
def list_installed_files(self):
"""
Iterates over the ``RECORD`` entries and returns a tuple
``(path, hash, size)`` for each line.
:returns: iterator of (path, hash, size)
"""
for result in self._get_records():
yield result
def write_installed_files(self, paths, prefix, dry_run=False):
"""
Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any
existing ``RECORD`` file is silently overwritten.
prefix is used to determine when to write absolute paths.
"""
prefix = os.path.join(prefix, '')
base = os.path.dirname(self.path)
base_under_prefix = base.startswith(prefix)
base = os.path.join(base, '')
record_path = self.get_distinfo_file('RECORD')
logger.info('creating %s', record_path)
if dry_run:
return None
with CSVWriter(record_path) as writer:
for path in paths:
if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')):
# do not put size and hash, as in PEP-376
hash_value = size = ''
else:
size = '%d' % os.path.getsize(path)
with open(path, 'rb') as fp:
hash_value = self.get_hash(fp.read())
if path.startswith(base) or (base_under_prefix and
path.startswith(prefix)):
path = os.path.relpath(path, base)
writer.writerow((path, hash_value, size))
# add the RECORD file itself
if record_path.startswith(base):
record_path = os.path.relpath(record_path, base)
writer.writerow((record_path, '', ''))
return record_path
def check_installed_files(self):
"""
Checks that the hashes and sizes of the files in ``RECORD`` are
matched by the files themselves. Returns a (possibly empty) list of
mismatches. Each entry in the mismatch list will be a tuple consisting
of the path, 'exists', 'size' or 'hash' according to what didn't match
(existence is checked first, then size, then hash), the expected
value and the actual value.
"""
mismatches = []
base = os.path.dirname(self.path)
record_path = self.get_distinfo_file('RECORD')
for path, hash_value, size in self.list_installed_files():
if not os.path.isabs(path):
path = os.path.join(base, path)
if path == record_path:
continue
if not os.path.exists(path):
mismatches.append((path, 'exists', True, False))
elif os.path.isfile(path):
actual_size = str(os.path.getsize(path))
if size and actual_size != size:
mismatches.append((path, 'size', size, actual_size))
elif hash_value:
if '=' in hash_value:
hasher = hash_value.split('=', 1)[0]
else:
hasher = None
with open(path, 'rb') as f:
actual_hash = self.get_hash(f.read(), hasher)
if actual_hash != hash_value:
mismatches.append((path, 'hash', hash_value, actual_hash))
return mismatches
@cached_property
def shared_locations(self):
"""
A dictionary of shared locations whose keys are in the set 'prefix',
'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
The corresponding value is the absolute path of that category for
this distribution, and takes into account any paths selected by the
user at installation time (e.g. via command-line arguments). In the
case of the 'namespace' key, this would be a list of absolute paths
for the roots of namespace packages in this distribution.
The first time this property is accessed, the relevant information is
read from the SHARED file in the .dist-info directory.
"""
result = {}
shared_path = os.path.join(self.path, 'SHARED')
if os.path.isfile(shared_path):
with codecs.open(shared_path, 'r', encoding='utf-8') as f:
lines = f.read().splitlines()
for line in lines:
key, value = line.split('=', 1)
if key == 'namespace':
result.setdefault(key, []).append(value)
else:
result[key] = value
return result
def write_shared_locations(self, paths, dry_run=False):
"""
Write shared location information to the SHARED file in .dist-info.
:param paths: A dictionary as described in the documentation for
:meth:`shared_locations`.
:param dry_run: If True, the action is logged but no file is actually
written.
:return: The path of the file written to.
"""
shared_path = os.path.join(self.path, 'SHARED')
logger.info('creating %s', shared_path)
if dry_run:
return None
lines = []
for key in ('prefix', 'lib', 'headers', 'scripts', 'data'):
path = paths[key]
if os.path.isdir(paths[key]):
lines.append('%s=%s' % (key, path))
for ns in paths.get('namespace', ()):
lines.append('namespace=%s' % ns)
with codecs.open(shared_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(lines))
return shared_path
def get_distinfo_resource(self, path):
if path not in DIST_FILES:
raise DistlibException('invalid path for a dist-info file: '
'%r at %r' % (path, self.path))
finder = resources.finder_for_path(self.path)
if finder is None:
raise DistlibException('Unable to get a finder for %s' % self.path)
return finder.find(path)
def get_distinfo_file(self, path):
"""
Returns a path located under the ``.dist-info`` directory. Returns a
string representing the path.
:parameter path: a ``'/'``-separated path relative to the
``.dist-info`` directory or an absolute path;
If *path* is an absolute path and doesn't start
with the ``.dist-info`` directory path,
a :class:`DistlibException` is raised
:type path: str
:rtype: str
"""
# Check if it is an absolute path # XXX use relpath, add tests
if path.find(os.sep) >= 0:
# it's an absolute path?
distinfo_dirname, path = path.split(os.sep)[-2:]
if distinfo_dirname != self.path.split(os.sep)[-1]:
raise DistlibException(
'dist-info file %r does not belong to the %r %s '
'distribution' % (path, self.name, self.version))
# The file must be relative
if path not in DIST_FILES:
raise DistlibException('invalid path for a dist-info file: '
'%r at %r' % (path, self.path))
return os.path.join(self.path, path)
def list_distinfo_files(self):
"""
Iterates over the ``RECORD`` entries and returns paths for each line if
the path is pointing to a file located in the ``.dist-info`` directory
or one of its subdirectories.
:returns: iterator of paths
"""
base = os.path.dirname(self.path)
for path, checksum, size in self._get_records():
# XXX add separator or use real relpath algo
if not os.path.isabs(path):
path = os.path.join(base, path)
if path.startswith(self.path):
yield path
def __eq__(self, other):
return (isinstance(other, InstalledDistribution) and
self.path == other.path)
# See http://docs.python.org/reference/datamodel#object.__hash__
__hash__ = object.__hash__
class EggInfoDistribution(BaseInstalledDistribution):
"""Created with the *path* of the ``.egg-info`` directory or file provided
to the constructor. It reads the metadata contained in the file itself, or
if the given path happens to be a directory, the metadata is read from the
file ``PKG-INFO`` under that directory."""
requested = True # as we have no way of knowing, assume it was
shared_locations = {}
def __init__(self, path, env=None):
def set_name_and_version(s, n, v):
s.name = n
s.key = n.lower() # for case-insensitive comparisons
s.version = v
self.path = path
self.dist_path = env
if env and env._cache_enabled and path in env._cache_egg.path:
metadata = env._cache_egg.path[path].metadata
set_name_and_version(self, metadata.name, metadata.version)
else:
metadata = self._get_metadata(path)
# Need to be set before caching
set_name_and_version(self, metadata.name, metadata.version)
if env and env._cache_enabled:
env._cache_egg.add(self)
super(EggInfoDistribution, self).__init__(metadata, path, env)
def _get_metadata(self, path):
requires = None
def parse_requires_data(data):
"""Create a list of dependencies from a requires.txt file.
*data*: the contents of a setuptools-produced requires.txt file.
"""
reqs = []
lines = data.splitlines()
for line in lines:
line = line.strip()
if line.startswith('['):
logger.warning('Unexpected line: quitting requirement scan: %r',
line)
break
r = parse_requirement(line)
if not r:
logger.warning('Not recognised as a requirement: %r', line)
continue
if r.extras:
logger.warning('extra requirements in requires.txt are '
'not supported')
if not r.constraints:
reqs.append(r.name)
else:
cons = ', '.join('%s%s' % c for c in r.constraints)
reqs.append('%s (%s)' % (r.name, cons))
return reqs
def parse_requires_path(req_path):
"""Create a list of dependencies from a requires.txt file.
*req_path*: the path to a setuptools-produced requires.txt file.
"""
reqs = []
try:
with codecs.open(req_path, 'r', 'utf-8') as fp:
reqs = parse_requires_data(fp.read())
except IOError:
pass
return reqs
if path.endswith('.egg'):
if os.path.isdir(path):
meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
metadata = Metadata(path=meta_path, scheme='legacy')
req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
requires = parse_requires_path(req_path)
else:
# FIXME handle the case where zipfile is not available
zipf = zipimport.zipimporter(path)
fileobj = StringIO(
zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
metadata = Metadata(fileobj=fileobj, scheme='legacy')
try:
data = zipf.get_data('EGG-INFO/requires.txt')
requires = parse_requires_data(data.decode('utf-8'))
except IOError:
requires = None
elif path.endswith('.egg-info'):
if os.path.isdir(path):
req_path = os.path.join(path, 'requires.txt')
requires = parse_requires_path(req_path)
path = os.path.join(path, 'PKG-INFO')
metadata = Metadata(path=path, scheme='legacy')
else:
raise DistlibException('path must end with .egg-info or .egg, '
'got %r' % path)
if requires:
metadata.add_requirements(requires)
return metadata
def __repr__(self):
return '<EggInfoDistribution %r %s at %r>' % (
self.name, self.version, self.path)
def __str__(self):
return "%s %s" % (self.name, self.version)
def check_installed_files(self):
"""
Checks that the hashes and sizes of the files in ``RECORD`` are
matched by the files themselves. Returns a (possibly empty) list of
mismatches. Each entry in the mismatch list will be a tuple consisting
of the path, 'exists', 'size' or 'hash' according to what didn't match
(existence is checked first, then size, then hash), the expected
value and the actual value.
"""
mismatches = []
record_path = os.path.join(self.path, 'installed-files.txt')
if os.path.exists(record_path):
for path, _, _ in self.list_installed_files():
if path == record_path:
continue
if not os.path.exists(path):
mismatches.append((path, 'exists', True, False))
return mismatches
def list_installed_files(self):
"""
Iterates over the ``installed-files.txt`` entries and returns a tuple
``(path, hash, size)`` for each line.
:returns: a list of (path, hash, size)
"""
def _md5(path):
f = open(path, 'rb')
try:
content = f.read()
finally:
f.close()
return hashlib.md5(content).hexdigest()
def _size(path):
return os.stat(path).st_size
record_path = os.path.join(self.path, 'installed-files.txt')
result = []
if os.path.exists(record_path):
with codecs.open(record_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
p = os.path.normpath(os.path.join(self.path, line))
# "./" is present as a marker between installed files
# and installation metadata files
if not os.path.exists(p):
logger.warning('Non-existent file: %s', p)
if p.endswith(('.pyc', '.pyo')):
continue
#otherwise fall through and fail
if not os.path.isdir(p):
result.append((p, _md5(p), _size(p)))
result.append((record_path, None, None))
return result
def list_distinfo_files(self, absolute=False):
"""
Iterates over the ``installed-files.txt`` entries and returns paths for
each line if the path is pointing to a file located in the
``.egg-info`` directory or one of its subdirectories.
:parameter absolute: If *absolute* is ``True``, each returned path is
transformed into a local absolute path. Otherwise the
raw value from ``installed-files.txt`` is returned.
:type absolute: boolean
:returns: iterator of paths
"""
record_path = os.path.join(self.path, 'installed-files.txt')
skip = True
with codecs.open(record_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line == './':
skip = False
continue
if not skip:
p = os.path.normpath(os.path.join(self.path, line))
if p.startswith(self.path):
if absolute:
yield p
else:
yield line
def __eq__(self, other):
return (isinstance(other, EggInfoDistribution) and
self.path == other.path)
# See http://docs.python.org/reference/datamodel#object.__hash__
__hash__ = object.__hash__
new_dist_class = InstalledDistribution
old_dist_class = EggInfoDistribution
class DependencyGraph(object):
"""
Represents a dependency graph between distributions.
The dependency relationships are stored in an ``adjacency_list`` that maps
distributions to a list of ``(other, label)`` tuples where ``other``
is a distribution and the edge is labeled with ``label`` (i.e. the version
specifier, if such was provided). Also, for more efficient traversal, for
every distribution ``x``, a list of predecessors is kept in
``reverse_list[x]``. An edge from distribution ``a`` to
distribution ``b`` means that ``a`` depends on ``b``. If any missing
dependencies are found, they are stored in ``missing``, which is a
dictionary that maps distributions to a list of requirements that were not
provided by any other distributions.
"""
def __init__(self):
self.adjacency_list = {}
self.reverse_list = {}
self.missing = {}
def add_distribution(self, distribution):
"""Add the *distribution* to the graph.
:type distribution: :class:`distutils2.database.InstalledDistribution`
or :class:`distutils2.database.EggInfoDistribution`
"""
self.adjacency_list[distribution] = []
self.reverse_list[distribution] = []
#self.missing[distribution] = []
def add_edge(self, x, y, label=None):
"""Add an edge from distribution *x* to distribution *y* with the given
*label*.
:type x: :class:`distutils2.database.InstalledDistribution` or
:class:`distutils2.database.EggInfoDistribution`
:type y: :class:`distutils2.database.InstalledDistribution` or
:class:`distutils2.database.EggInfoDistribution`
:type label: ``str`` or ``None``
"""
self.adjacency_list[x].append((y, label))
# multiple edges are allowed, so be careful
if x not in self.reverse_list[y]:
self.reverse_list[y].append(x)
def add_missing(self, distribution, requirement):
"""
Add a missing *requirement* for the given *distribution*.
:type distribution: :class:`distutils2.database.InstalledDistribution`
or :class:`distutils2.database.EggInfoDistribution`
:type requirement: ``str``
"""
logger.debug('%s missing %r', distribution, requirement)
self.missing.setdefault(distribution, []).append(requirement)
def _repr_dist(self, dist):
return '%s %s' % (dist.name, dist.version)
def repr_node(self, dist, level=1):
"""Prints only a subgraph"""
output = [self._repr_dist(dist)]
for other, label in self.adjacency_list[dist]:
dist = self._repr_dist(other)
if label is not None:
dist = '%s [%s]' % (dist, label)
output.append(' ' * level + str(dist))
suboutput = self.repr_node(other, level + 1)
subs = suboutput.split('\n')
output.extend(subs[1:])
return '\n'.join(output)
def to_dot(self, f, skip_disconnected=True):
"""Writes a DOT output for the graph to the provided file *f*.
If *skip_disconnected* is set to ``True``, then all distributions
that are not dependent on any other distribution are skipped.
:type f: has to support ``file``-like operations
:type skip_disconnected: ``bool``
"""
disconnected = []
f.write("digraph dependencies {\n")
for dist, adjs in self.adjacency_list.items():
if len(adjs) == 0 and not skip_disconnected:
disconnected.append(dist)
for other, label in adjs:
if not label is None:
f.write('"%s" -> "%s" [label="%s"]\n' %
(dist.name, other.name, label))
else:
f.write('"%s" -> "%s"\n' % (dist.name, other.name))
if not skip_disconnected and len(disconnected) > 0:
f.write('subgraph disconnected {\n')
f.write('label = "Disconnected"\n')
f.write('bgcolor = red\n')
for dist in disconnected:
f.write('"%s"' % dist.name)
f.write('\n')
f.write('}\n')
f.write('}\n')
def topological_sort(self):
"""
Perform a topological sort of the graph.
:return: A tuple, the first element of which is a topologically sorted
list of distributions, and the second element of which is a
list of distributions that cannot be sorted because they have
circular dependencies and so form a cycle.
"""
result = []
# Make a shallow copy of the adjacency list
alist = {}
for k, v in self.adjacency_list.items():
alist[k] = v[:]
while True:
# See what we can remove in this run
to_remove = []
for k, v in list(alist.items())[:]:
if not v:
to_remove.append(k)
del alist[k]
if not to_remove:
# What's left in alist (if anything) is a cycle.
break
# Remove from the adjacency list of others
for k, v in alist.items():
alist[k] = [(d, r) for d, r in v if d not in to_remove]
logger.debug('Moving to result: %s',
['%s (%s)' % (d.name, d.version) for d in to_remove])
result.extend(to_remove)
return result, list(alist.keys())
def __repr__(self):
"""Representation of the graph"""
output = []
for dist, adjs in self.adjacency_list.items():
output.append(self.repr_node(dist))
return '\n'.join(output)
def make_graph(dists, scheme='default'):
"""Makes a dependency graph from the given distributions.
:parameter dists: a list of distributions
:type dists: list of :class:`distutils2.database.InstalledDistribution` and
:class:`distutils2.database.EggInfoDistribution` instances
:rtype: a :class:`DependencyGraph` instance
"""
scheme = get_scheme(scheme)
graph = DependencyGraph()
provided = {} # maps names to lists of (version, dist) tuples
# first, build the graph and find out what's provided
for dist in dists:
graph.add_distribution(dist)
for p in dist.provides:
name, version = parse_name_and_version(p)
logger.debug('Add to provided: %s, %s, %s', name, version, dist)
provided.setdefault(name, []).append((version, dist))
# now make the edges
for dist in dists:
requires = (dist.run_requires | dist.meta_requires |
dist.build_requires | dist.dev_requires)
for req in requires:
try:
matcher = scheme.matcher(req)
except UnsupportedVersionError:
# XXX compat-mode if cannot read the version
logger.warning('could not read version %r - using name only',
req)
name = req.split()[0]
matcher = scheme.matcher(name)
name = matcher.key # case-insensitive
matched = False
if name in provided:
for version, provider in provided[name]:
try:
match = matcher.match(version)
except UnsupportedVersionError:
match = False
if match:
graph.add_edge(dist, provider, req)
matched = True
break
if not matched:
graph.add_missing(dist, req)
return graph
def get_dependent_dists(dists, dist):
"""Recursively generate a list of distributions from *dists* that are
dependent on *dist*.
:param dists: a list of distributions
:param dist: a distribution, member of *dists* for which we are interested
"""
if dist not in dists:
raise DistlibException('given distribution %r is not a member '
'of the list' % dist.name)
graph = make_graph(dists)
dep = [dist] # dependent distributions
todo = graph.reverse_list[dist] # list of nodes we should inspect
while todo:
d = todo.pop()
dep.append(d)
for succ in graph.reverse_list[d]:
if succ not in dep:
todo.append(succ)
dep.pop(0) # remove dist from dep, was there to prevent infinite loops
return dep
def get_required_dists(dists, dist):
"""Recursively generate a list of distributions from *dists* that are
required by *dist*.
:param dists: a list of distributions
:param dist: a distribution, member of *dists* for which we are interested
"""
if dist not in dists:
raise DistlibException('given distribution %r is not a member '
'of the list' % dist.name)
graph = make_graph(dists)
req = [] # required distributions
todo = graph.adjacency_list[dist] # list of nodes we should inspect
while todo:
d = todo.pop()[0]
req.append(d)
for pred in graph.adjacency_list[d]:
if pred not in req:
todo.append(pred)
return req
def make_dist(name, version, **kwargs):
"""
A convenience method for making a dist given just a name and version.
"""
summary = kwargs.pop('summary', 'Placeholder for summary')
md = Metadata(**kwargs)
md.name = name
md.version = version
md.summary = summary or 'Placeholder for summary'
return Distribution(md)
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import hashlib
import logging
import os
import shutil
import subprocess
import tempfile
try:
from threading import Thread
except ImportError:
from dummy_threading import Thread
from . import DistlibException
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
urlparse, build_opener, string_types)
from .util import cached_property, zip_dir, ServerProxy
logger = logging.getLogger(__name__)
DEFAULT_INDEX = 'https://pypi.python.org/pypi'
DEFAULT_REALM = 'pypi'
class PackageIndex(object):
"""
This class represents a package index compatible with PyPI, the Python
Package Index.
"""
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
def __init__(self, url=None):
"""
Initialise an instance.
:param url: The URL of the index. If not specified, the URL for PyPI is
used.
"""
self.url = url or DEFAULT_INDEX
self.read_configuration()
scheme, netloc, path, params, query, frag = urlparse(self.url)
if params or query or frag or scheme not in ('http', 'https'):
raise DistlibException('invalid repository: %s' % self.url)
self.password_handler = None
self.ssl_verifier = None
self.gpg = None
self.gpg_home = None
self.rpc_proxy = None
with open(os.devnull, 'w') as sink:
# Use gpg by default rather than gpg2, as gpg2 insists on
# prompting for passwords
for s in ('gpg', 'gpg2'):
try:
rc = subprocess.check_call([s, '--version'], stdout=sink,
stderr=sink)
if rc == 0:
self.gpg = s
break
except OSError:
pass
def _get_pypirc_command(self):
"""
Get the distutils command for interacting with PyPI configurations.
:return: the command.
"""
from distutils.core import Distribution
from distutils.config import PyPIRCCommand
d = Distribution()
return PyPIRCCommand(d)
def read_configuration(self):
"""
Read the PyPI access configuration as supported by distutils, getting
PyPI to do the actual work. This populates ``username``, ``password``,
``realm`` and ``url`` attributes from the configuration.
"""
# get distutils to do the work
c = self._get_pypirc_command()
c.repository = self.url
cfg = c._read_pypirc()
self.username = cfg.get('username')
self.password = cfg.get('password')
self.realm = cfg.get('realm', 'pypi')
self.url = cfg.get('repository', self.url)
def save_configuration(self):
"""
Save the PyPI access configuration. You must have set ``username`` and
``password`` attributes before calling this method.
Again, distutils is used to do the actual work.
"""
self.check_credentials()
# get distutils to do the work
c = self._get_pypirc_command()
c._store_pypirc(self.username, self.password)
def check_credentials(self):
"""
Check that ``username`` and ``password`` have been set, and raise an
exception if not.
"""
if self.username is None or self.password is None:
raise DistlibException('username and password must be set')
pm = HTTPPasswordMgr()
_, netloc, _, _, _, _ = urlparse(self.url)
pm.add_password(self.realm, netloc, self.username, self.password)
self.password_handler = HTTPBasicAuthHandler(pm)
def register(self, metadata):
"""
Register a distribution on PyPI, using the provided metadata.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the distribution to be
registered.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
metadata.validate()
d = metadata.todict()
d[':action'] = 'verify'
request = self.encode_request(d.items(), [])
response = self.send_request(request)
d[':action'] = 'submit'
request = self.encode_request(d.items(), [])
return self.send_request(request)
def _reader(self, name, stream, outbuf):
"""
Thread runner for reading lines of from a subprocess into a buffer.
:param name: The logical name of the stream (used for logging only).
:param stream: The stream to read from. This will typically a pipe
connected to the output stream of a subprocess.
:param outbuf: The list to append the read lines to.
"""
while True:
s = stream.readline()
if not s:
break
s = s.decode('utf-8').rstrip()
outbuf.append(s)
logger.debug('%s: %s' % (name, s))
stream.close()
def get_sign_command(self, filename, signer, sign_password,
keystore=None):
"""
Return a suitable command for signing a file.
:param filename: The pathname to the file to be signed.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: The signing command as a list suitable to be
passed to :class:`subprocess.Popen`.
"""
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
if keystore is None:
keystore = self.gpg_home
if keystore:
cmd.extend(['--homedir', keystore])
if sign_password is not None:
cmd.extend(['--batch', '--passphrase-fd', '0'])
td = tempfile.mkdtemp()
sf = os.path.join(td, os.path.basename(filename) + '.asc')
cmd.extend(['--detach-sign', '--armor', '--local-user',
signer, '--output', sf, filename])
logger.debug('invoking: %s', ' '.join(cmd))
return cmd, sf
def run_command(self, cmd, input_data=None):
"""
Run a command in a child process , passing it any input data specified.
:param cmd: The command to run.
:param input_data: If specified, this must be a byte string containing
data to be sent to the child process.
:return: A tuple consisting of the subprocess' exit code, a list of
lines read from the subprocess' ``stdout``, and a list of
lines read from the subprocess' ``stderr``.
"""
kwargs = {
'stdout': subprocess.PIPE,
'stderr': subprocess.PIPE,
}
if input_data is not None:
kwargs['stdin'] = subprocess.PIPE
stdout = []
stderr = []
p = subprocess.Popen(cmd, **kwargs)
# We don't use communicate() here because we may need to
# get clever with interacting with the command
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
t1.start()
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
t2.start()
if input_data is not None:
p.stdin.write(input_data)
p.stdin.close()
p.wait()
t1.join()
t2.join()
return p.returncode, stdout, stderr
def sign_file(self, filename, signer, sign_password, keystore=None):
"""
Sign a file.
:param filename: The pathname to the file to be signed.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param keystore: The path to a directory which contains the keys
used in signing. If not specified, the instance's
``gpg_home`` attribute is used instead.
:return: The absolute pathname of the file where the signature is
stored.
"""
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
keystore)
rc, stdout, stderr = self.run_command(cmd,
sign_password.encode('utf-8'))
if rc != 0:
raise DistlibException('sign command failed with error '
'code %s' % rc)
return sig_file
def upload_file(self, metadata, filename, signer=None, sign_password=None,
filetype='sdist', pyversion='source', keystore=None):
"""
Upload a release file to the index.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the file to be uploaded.
:param filename: The pathname of the file to be uploaded.
:param signer: The identifier of the signer of the file.
:param sign_password: The passphrase for the signer's
private key used for signing.
:param filetype: The type of the file being uploaded. This is the
distutils command which produced that file, e.g.
``sdist`` or ``bdist_wheel``.
:param pyversion: The version of Python which the release relates
to. For code compatible with any Python, this would
be ``source``, otherwise it would be e.g. ``3.2``.
:param keystore: The path to a directory which contains the keys
used in signing. If not specified, the instance's
``gpg_home`` attribute is used instead.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
if not os.path.exists(filename):
raise DistlibException('not found: %s' % filename)
metadata.validate()
d = metadata.todict()
sig_file = None
if signer:
if not self.gpg:
logger.warning('no signing program available - not signed')
else:
sig_file = self.sign_file(filename, signer, sign_password,
keystore)
with open(filename, 'rb') as f:
file_data = f.read()
md5_digest = hashlib.md5(file_data).hexdigest()
sha256_digest = hashlib.sha256(file_data).hexdigest()
d.update({
':action': 'file_upload',
'protocol_version': '1',
'filetype': filetype,
'pyversion': pyversion,
'md5_digest': md5_digest,
'sha256_digest': sha256_digest,
})
files = [('content', os.path.basename(filename), file_data)]
if sig_file:
with open(sig_file, 'rb') as f:
sig_data = f.read()
files.append(('gpg_signature', os.path.basename(sig_file),
sig_data))
shutil.rmtree(os.path.dirname(sig_file))
request = self.encode_request(d.items(), files)
return self.send_request(request)
def upload_documentation(self, metadata, doc_dir):
"""
Upload documentation to the index.
:param metadata: A :class:`Metadata` instance defining at least a name
and version number for the documentation to be
uploaded.
:param doc_dir: The pathname of the directory which contains the
documentation. This should be the directory that
contains the ``index.html`` for the documentation.
:return: The HTTP response received from PyPI upon submission of the
request.
"""
self.check_credentials()
if not os.path.isdir(doc_dir):
raise DistlibException('not a directory: %r' % doc_dir)
fn = os.path.join(doc_dir, 'index.html')
if not os.path.exists(fn):
raise DistlibException('not found: %r' % fn)
metadata.validate()
name, version = metadata.name, metadata.version
zip_data = zip_dir(doc_dir).getvalue()
fields = [(':action', 'doc_upload'),
('name', name), ('version', version)]
files = [('content', name, zip_data)]
request = self.encode_request(fields, files)
return self.send_request(request)
def get_verify_command(self, signature_filename, data_filename,
keystore=None):
"""
Return a suitable command for verifying a file.
:param signature_filename: The pathname to the file containing the
signature.
:param data_filename: The pathname to the file containing the
signed data.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: The verifying command as a list suitable to be
passed to :class:`subprocess.Popen`.
"""
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
if keystore is None:
keystore = self.gpg_home
if keystore:
cmd.extend(['--homedir', keystore])
cmd.extend(['--verify', signature_filename, data_filename])
logger.debug('invoking: %s', ' '.join(cmd))
return cmd
def verify_signature(self, signature_filename, data_filename,
keystore=None):
"""
Verify a signature for a file.
:param signature_filename: The pathname to the file containing the
signature.
:param data_filename: The pathname to the file containing the
signed data.
:param keystore: The path to a directory which contains the keys
used in verification. If not specified, the
instance's ``gpg_home`` attribute is used instead.
:return: True if the signature was verified, else False.
"""
if not self.gpg:
raise DistlibException('verification unavailable because gpg '
'unavailable')
cmd = self.get_verify_command(signature_filename, data_filename,
keystore)
rc, stdout, stderr = self.run_command(cmd)
if rc not in (0, 1):
raise DistlibException('verify command failed with error '
'code %s' % rc)
return rc == 0
def download_file(self, url, destfile, digest=None, reporthook=None):
"""
This is a convenience method for downloading a file from an URL.
Normally, this will be a file from the index, though currently
no check is made for this (i.e. a file can be downloaded from
anywhere).
The method is just like the :func:`urlretrieve` function in the
standard library, except that it allows digest computation to be
done during download and checking that the downloaded data
matched any expected value.
:param url: The URL of the file to be downloaded (assumed to be
available via an HTTP GET request).
:param destfile: The pathname where the downloaded file is to be
saved.
:param digest: If specified, this must be a (hasher, value)
tuple, where hasher is the algorithm used (e.g.
``'md5'``) and ``value`` is the expected value.
:param reporthook: The same as for :func:`urlretrieve` in the
standard library.
"""
if digest is None:
digester = None
logger.debug('No digest specified')
else:
if isinstance(digest, (list, tuple)):
hasher, digest = digest
else:
hasher = 'md5'
digester = getattr(hashlib, hasher)()
logger.debug('Digest specified: %s' % digest)
# The following code is equivalent to urlretrieve.
# We need to do it this way so that we can compute the
# digest of the file as we go.
with open(destfile, 'wb') as dfp:
# addinfourl is not a context manager on 2.x
# so we have to use try/finally
sfp = self.send_request(Request(url))
try:
headers = sfp.info()
blocksize = 8192
size = -1
read = 0
blocknum = 0
if "content-length" in headers:
size = int(headers["Content-Length"])
if reporthook:
reporthook(blocknum, blocksize, size)
while True:
block = sfp.read(blocksize)
if not block:
break
read += len(block)
dfp.write(block)
if digester:
digester.update(block)
blocknum += 1
if reporthook:
reporthook(blocknum, blocksize, size)
finally:
sfp.close()
# check that we got the whole file, if we can
if size >= 0 and read < size:
raise DistlibException(
'retrieval incomplete: got only %d out of %d bytes'
% (read, size))
# if we have a digest, it must match.
if digester:
actual = digester.hexdigest()
if digest != actual:
raise DistlibException('%s digest mismatch for %s: expected '
'%s, got %s' % (hasher, destfile,
digest, actual))
logger.debug('Digest verified: %s', digest)
def send_request(self, req):
"""
Send a standard library :class:`Request` to PyPI and return its
response.
:param req: The request to send.
:return: The HTTP response from PyPI (a standard library HTTPResponse).
"""
handlers = []
if self.password_handler:
handlers.append(self.password_handler)
if self.ssl_verifier:
handlers.append(self.ssl_verifier)
opener = build_opener(*handlers)
return opener.open(req)
def encode_request(self, fields, files):
"""
Encode fields and files for posting to an HTTP server.
:param fields: The fields to send as a list of (fieldname, value)
tuples.
:param files: The files to send as a list of (fieldname, filename,
file_bytes) tuple.
"""
# Adapted from packaging, which in turn was adapted from
# http://code.activestate.com/recipes/146306
parts = []
boundary = self.boundary
for k, values in fields:
if not isinstance(values, (list, tuple)):
values = [values]
for v in values:
parts.extend((
b'--' + boundary,
('Content-Disposition: form-data; name="%s"' %
k).encode('utf-8'),
b'',
v.encode('utf-8')))
for key, filename, value in files:
parts.extend((
b'--' + boundary,
('Content-Disposition: form-data; name="%s"; filename="%s"' %
(key, filename)).encode('utf-8'),
b'',
value))
parts.extend((b'--' + boundary + b'--', b''))
body = b'\r\n'.join(parts)
ct = b'multipart/form-data; boundary=' + boundary
headers = {
'Content-type': ct,
'Content-length': str(len(body))
}
return Request(self.url, body, headers)
def search(self, terms, operator=None):
if isinstance(terms, string_types):
terms = {'name': terms}
if self.rpc_proxy is None:
self.rpc_proxy = ServerProxy(self.url, timeout=3.0)
return self.rpc_proxy.search(terms, operator or 'and')
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2015 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import gzip
from io import BytesIO
import json
import logging
import os
import posixpath
import re
try:
import threading
except ImportError: # pragma: no cover
import dummy_threading as threading
import zlib
from . import DistlibException
from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
queue, quote, unescape, string_types, build_opener,
HTTPRedirectHandler as BaseRedirectHandler, text_type,
Request, HTTPError, URLError)
from .database import Distribution, DistributionPath, make_dist
from .metadata import Metadata
from .util import (cached_property, parse_credentials, ensure_slash,
split_filename, get_project_data, parse_requirement,
parse_name_and_version, ServerProxy, normalize_name)
from .version import get_scheme, UnsupportedVersionError
from .wheel import Wheel, is_compatible
logger = logging.getLogger(__name__)
HASHER_HASH = re.compile('^(\w+)=([a-f0-9]+)')
CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)
HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')
DEFAULT_INDEX = 'https://pypi.python.org/pypi'
def get_all_distribution_names(url=None):
"""
Return all distribution names known by an index.
:param url: The URL of the index.
:return: A list of all known distribution names.
"""
if url is None:
url = DEFAULT_INDEX
client = ServerProxy(url, timeout=3.0)
return client.list_packages()
class RedirectHandler(BaseRedirectHandler):
"""
A class to work around a bug in some Python 3.2.x releases.
"""
# There's a bug in the base version for some 3.2.x
# (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header
# returns e.g. /abc, it bails because it says the scheme ''
# is bogus, when actually it should use the request's
# URL for the scheme. See Python issue #13696.
def http_error_302(self, req, fp, code, msg, headers):
# Some servers (incorrectly) return multiple Location headers
# (so probably same goes for URI). Use first header.
newurl = None
for key in ('location', 'uri'):
if key in headers:
newurl = headers[key]
break
if newurl is None:
return
urlparts = urlparse(newurl)
if urlparts.scheme == '':
newurl = urljoin(req.get_full_url(), newurl)
if hasattr(headers, 'replace_header'):
headers.replace_header(key, newurl)
else:
headers[key] = newurl
return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,
headers)
http_error_301 = http_error_303 = http_error_307 = http_error_302
class Locator(object):
"""
A base class for locators - things that locate distributions.
"""
source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')
binary_extensions = ('.egg', '.exe', '.whl')
excluded_extensions = ('.pdf',)
# A list of tags indicating which wheels you want to match. The default
# value of None matches against the tags compatible with the running
# Python. If you want to match other values, set wheel_tags on a locator
# instance to a list of tuples (pyver, abi, arch) which you want to match.
wheel_tags = None
downloadable_extensions = source_extensions + ('.whl',)
def __init__(self, scheme='default'):
"""
Initialise an instance.
:param scheme: Because locators look for most recent versions, they
need to know the version scheme to use. This specifies
the current PEP-recommended scheme - use ``'legacy'``
if you need to support existing distributions on PyPI.
"""
self._cache = {}
self.scheme = scheme
# Because of bugs in some of the handlers on some of the platforms,
# we use our own opener rather than just using urlopen.
self.opener = build_opener(RedirectHandler())
# If get_project() is called from locate(), the matcher instance
# is set from the requirement passed to locate(). See issue #18 for
# why this can be useful to know.
self.matcher = None
self.errors = queue.Queue()
def get_errors(self):
"""
Return any errors which have occurred.
"""
result = []
while not self.errors.empty(): # pragma: no cover
try:
e = self.errors.get(False)
result.append(e)
except self.errors.Empty:
continue
self.errors.task_done()
return result
def clear_errors(self):
"""
Clear any errors which may have been logged.
"""
# Just get the errors and throw them away
self.get_errors()
def clear_cache(self):
self._cache.clear()
def _get_scheme(self):
return self._scheme
def _set_scheme(self, value):
self._scheme = value
scheme = property(_get_scheme, _set_scheme)
def _get_project(self, name):
"""
For a given project, get a dictionary mapping available versions to Distribution
instances.
This should be implemented in subclasses.
If called from a locate() request, self.matcher will be set to a
matcher for the requirement to satisfy, otherwise it will be None.
"""
raise NotImplementedError('Please implement in the subclass')
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
raise NotImplementedError('Please implement in the subclass')
def get_project(self, name):
"""
For a given project, get a dictionary mapping available versions to Distribution
instances.
This calls _get_project to do all the work, and just implements a caching layer on top.
"""
if self._cache is None:
result = self._get_project(name)
elif name in self._cache:
result = self._cache[name]
else:
self.clear_errors()
result = self._get_project(name)
self._cache[name] = result
return result
def score_url(self, url):
"""
Give an url a score which can be used to choose preferred URLs
for a given project release.
"""
t = urlparse(url)
basename = posixpath.basename(t.path)
compatible = True
is_wheel = basename.endswith('.whl')
if is_wheel:
compatible = is_compatible(Wheel(basename), self.wheel_tags)
return (t.scheme != 'https', 'pypi.python.org' in t.netloc,
is_wheel, compatible, basename)
def prefer_url(self, url1, url2):
"""
Choose one of two URLs where both are candidates for distribution
archives for the same version of a distribution (for example,
.tar.gz vs. zip).
The current implementation favours https:// URLs over http://, archives
from PyPI over those from other locations, wheel compatibility (if a
wheel) and then the archive name.
"""
result = url2
if url1:
s1 = self.score_url(url1)
s2 = self.score_url(url2)
if s1 > s2:
result = url1
if result != url2:
logger.debug('Not replacing %r with %r', url1, url2)
else:
logger.debug('Replacing %r with %r', url1, url2)
return result
def split_filename(self, filename, project_name):
"""
Attempt to split a filename in project name, version and Python version.
"""
return split_filename(filename, project_name)
def convert_url_to_download_info(self, url, project_name):
"""
See if a URL is a candidate for a download URL for a project (the URL
has typically been scraped from an HTML page).
If it is, a dictionary is returned with keys "name", "version",
"filename" and "url"; otherwise, None is returned.
"""
def same_project(name1, name2):
return normalize_name(name1) == normalize_name(name2)
result = None
scheme, netloc, path, params, query, frag = urlparse(url)
if frag.lower().startswith('egg='):
logger.debug('%s: version hint in fragment: %r',
project_name, frag)
m = HASHER_HASH.match(frag)
if m:
algo, digest = m.groups()
else:
algo, digest = None, None
origpath = path
if path and path[-1] == '/':
path = path[:-1]
if path.endswith('.whl'):
try:
wheel = Wheel(path)
if is_compatible(wheel, self.wheel_tags):
if project_name is None:
include = True
else:
include = same_project(wheel.name, project_name)
if include:
result = {
'name': wheel.name,
'version': wheel.version,
'filename': wheel.filename,
'url': urlunparse((scheme, netloc, origpath,
params, query, '')),
'python-version': ', '.join(
['.'.join(list(v[2:])) for v in wheel.pyver]),
}
except Exception as e: # pragma: no cover
logger.warning('invalid path for wheel: %s', path)
elif path.endswith(self.downloadable_extensions):
path = filename = posixpath.basename(path)
for ext in self.downloadable_extensions:
if path.endswith(ext):
path = path[:-len(ext)]
t = self.split_filename(path, project_name)
if not t:
logger.debug('No match for project/version: %s', path)
else:
name, version, pyver = t
if not project_name or same_project(project_name, name):
result = {
'name': name,
'version': version,
'filename': filename,
'url': urlunparse((scheme, netloc, origpath,
params, query, '')),
#'packagetype': 'sdist',
}
if pyver:
result['python-version'] = pyver
break
if result and algo:
result['%s_digest' % algo] = digest
return result
def _get_digest(self, info):
"""
Get a digest from a dictionary by looking at keys of the form
'algo_digest'.
Returns a 2-tuple (algo, digest) if found, else None. Currently
looks only for SHA256, then MD5.
"""
result = None
for algo in ('sha256', 'md5'):
key = '%s_digest' % algo
if key in info:
result = (algo, info[key])
break
return result
def _update_version_data(self, result, info):
"""
Update a result dictionary (the final result from _get_project) with a
dictionary for a specific version, which typically holds information
gleaned from a filename or URL for an archive for the distribution.
"""
name = info.pop('name')
version = info.pop('version')
if version in result:
dist = result[version]
md = dist.metadata
else:
dist = make_dist(name, version, scheme=self.scheme)
md = dist.metadata
dist.digest = digest = self._get_digest(info)
url = info['url']
result['digests'][url] = digest
if md.source_url != info['url']:
md.source_url = self.prefer_url(md.source_url, url)
result['urls'].setdefault(version, set()).add(url)
dist.locator = self
result[version] = dist
def locate(self, requirement, prereleases=False):
"""
Find the most recent distribution which matches the given
requirement.
:param requirement: A requirement of the form 'foo (1.0)' or perhaps
'foo (>= 1.0, < 2.0, != 1.3)'
:param prereleases: If ``True``, allow pre-release versions
to be located. Otherwise, pre-release versions
are not returned.
:return: A :class:`Distribution` instance, or ``None`` if no such
distribution could be located.
"""
result = None
r = parse_requirement(requirement)
if r is None:
raise DistlibException('Not a valid requirement: %r' % requirement)
scheme = get_scheme(self.scheme)
self.matcher = matcher = scheme.matcher(r.requirement)
logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__)
versions = self.get_project(r.name)
if len(versions) > 2: # urls and digests keys are present
# sometimes, versions are invalid
slist = []
vcls = matcher.version_class
for k in versions:
if k in ('urls', 'digests'):
continue
try:
if not matcher.match(k):
logger.debug('%s did not match %r', matcher, k)
else:
if prereleases or not vcls(k).is_prerelease:
slist.append(k)
else:
logger.debug('skipping pre-release '
'version %s of %s', k, matcher.name)
except Exception: # pragma: no cover
logger.warning('error matching %s with %r', matcher, k)
pass # slist.append(k)
if len(slist) > 1:
slist = sorted(slist, key=scheme.key)
if slist:
logger.debug('sorted list: %s', slist)
version = slist[-1]
result = versions[version]
if result:
if r.extras:
result.extras = r.extras
result.download_urls = versions.get('urls', {}).get(version, set())
d = {}
sd = versions.get('digests', {})
for url in result.download_urls:
if url in sd:
d[url] = sd[url]
result.digests = d
self.matcher = None
return result
class PyPIRPCLocator(Locator):
"""
This locator uses XML-RPC to locate distributions. It therefore
cannot be used with simple mirrors (that only mirror file content).
"""
def __init__(self, url, **kwargs):
"""
Initialise an instance.
:param url: The URL to use for XML-RPC.
:param kwargs: Passed to the superclass constructor.
"""
super(PyPIRPCLocator, self).__init__(**kwargs)
self.base_url = url
self.client = ServerProxy(url, timeout=3.0)
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
return set(self.client.list_packages())
def _get_project(self, name):
result = {'urls': {}, 'digests': {}}
versions = self.client.package_releases(name, True)
for v in versions:
urls = self.client.release_urls(name, v)
data = self.client.release_data(name, v)
metadata = Metadata(scheme=self.scheme)
metadata.name = data['name']
metadata.version = data['version']
metadata.license = data.get('license')
metadata.keywords = data.get('keywords', [])
metadata.summary = data.get('summary')
dist = Distribution(metadata)
if urls:
info = urls[0]
metadata.source_url = info['url']
dist.digest = self._get_digest(info)
dist.locator = self
result[v] = dist
for info in urls:
url = info['url']
digest = self._get_digest(info)
result['urls'].setdefault(v, set()).add(url)
result['digests'][url] = digest
return result
class PyPIJSONLocator(Locator):
"""
This locator uses PyPI's JSON interface. It's very limited in functionality
and probably not worth using.
"""
def __init__(self, url, **kwargs):
super(PyPIJSONLocator, self).__init__(**kwargs)
self.base_url = ensure_slash(url)
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
raise NotImplementedError('Not available from this locator')
def _get_project(self, name):
result = {'urls': {}, 'digests': {}}
url = urljoin(self.base_url, '%s/json' % quote(name))
try:
resp = self.opener.open(url)
data = resp.read().decode() # for now
d = json.loads(data)
md = Metadata(scheme=self.scheme)
data = d['info']
md.name = data['name']
md.version = data['version']
md.license = data.get('license')
md.keywords = data.get('keywords', [])
md.summary = data.get('summary')
dist = Distribution(md)
dist.locator = self
urls = d['urls']
result[md.version] = dist
for info in d['urls']:
url = info['url']
dist.download_urls.add(url)
dist.digests[url] = self._get_digest(info)
result['urls'].setdefault(md.version, set()).add(url)
result['digests'][url] = self._get_digest(info)
# Now get other releases
for version, infos in d['releases'].items():
if version == md.version:
continue # already done
omd = Metadata(scheme=self.scheme)
omd.name = md.name
omd.version = version
odist = Distribution(omd)
odist.locator = self
result[version] = odist
for info in infos:
url = info['url']
odist.download_urls.add(url)
odist.digests[url] = self._get_digest(info)
result['urls'].setdefault(version, set()).add(url)
result['digests'][url] = self._get_digest(info)
# for info in urls:
# md.source_url = info['url']
# dist.digest = self._get_digest(info)
# dist.locator = self
# for info in urls:
# url = info['url']
# result['urls'].setdefault(md.version, set()).add(url)
# result['digests'][url] = self._get_digest(info)
except Exception as e:
self.errors.put(text_type(e))
logger.exception('JSON fetch failed: %s', e)
return result
class Page(object):
"""
This class represents a scraped HTML page.
"""
# The following slightly hairy-looking regex just looks for the contents of
# an anchor link, which has an attribute "href" either immediately preceded
# or immediately followed by a "rel" attribute. The attribute values can be
# declared with double quotes, single quotes or no quotes - which leads to
# the length of the expression.
_href = re.compile("""
(rel\s*=\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\s\n]*))\s+)?
href\s*=\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\s\n]*))
(\s+rel\s*=\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\s\n]*)))?
""", re.I | re.S | re.X)
_base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S)
def __init__(self, data, url):
"""
Initialise an instance with the Unicode page contents and the URL they
came from.
"""
self.data = data
self.base_url = self.url = url
m = self._base.search(self.data)
if m:
self.base_url = m.group(1)
_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
@cached_property
def links(self):
"""
Return the URLs of all the links on a page together with information
about their "rel" attribute, for determining which ones to treat as
downloads and which ones to queue for further scraping.
"""
def clean(url):
"Tidy up an URL."
scheme, netloc, path, params, query, frag = urlparse(url)
return urlunparse((scheme, netloc, quote(path),
params, query, frag))
result = set()
for match in self._href.finditer(self.data):
d = match.groupdict('')
rel = (d['rel1'] or d['rel2'] or d['rel3'] or
d['rel4'] or d['rel5'] or d['rel6'])
url = d['url1'] or d['url2'] or d['url3']
url = urljoin(self.base_url, url)
url = unescape(url)
url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url)
result.add((url, rel))
# We sort the result, hoping to bring the most recent versions
# to the front
result = sorted(result, key=lambda t: t[0], reverse=True)
return result
class SimpleScrapingLocator(Locator):
"""
A locator which scrapes HTML pages to locate downloads for a distribution.
This runs multiple threads to do the I/O; performance is at least as good
as pip's PackageFinder, which works in an analogous fashion.
"""
# These are used to deal with various Content-Encoding schemes.
decoders = {
'deflate': zlib.decompress,
'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(),
'none': lambda b: b,
}
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
"""
Initialise an instance.
:param url: The root URL to use for scraping.
:param timeout: The timeout, in seconds, to be applied to requests.
This defaults to ``None`` (no timeout specified).
:param num_workers: The number of worker threads you want to do I/O,
This defaults to 10.
:param kwargs: Passed to the superclass.
"""
super(SimpleScrapingLocator, self).__init__(**kwargs)
self.base_url = ensure_slash(url)
self.timeout = timeout
self._page_cache = {}
self._seen = set()
self._to_fetch = queue.Queue()
self._bad_hosts = set()
self.skip_externals = False
self.num_workers = num_workers
self._lock = threading.RLock()
# See issue #45: we need to be resilient when the locator is used
# in a thread, e.g. with concurrent.futures. We can't use self._lock
# as it is for coordinating our internal threads - the ones created
# in _prepare_threads.
self._gplock = threading.RLock()
def _prepare_threads(self):
"""
Threads are created only when get_project is called, and terminate
before it returns. They are there primarily to parallelise I/O (i.e.
fetching web pages).
"""
self._threads = []
for i in range(self.num_workers):
t = threading.Thread(target=self._fetch)
t.setDaemon(True)
t.start()
self._threads.append(t)
def _wait_threads(self):
"""
Tell all the threads to terminate (by sending a sentinel value) and
wait for them to do so.
"""
# Note that you need two loops, since you can't say which
# thread will get each sentinel
for t in self._threads:
self._to_fetch.put(None) # sentinel
for t in self._threads:
t.join()
self._threads = []
def _get_project(self, name):
result = {'urls': {}, 'digests': {}}
with self._gplock:
self.result = result
self.project_name = name
url = urljoin(self.base_url, '%s/' % quote(name))
self._seen.clear()
self._page_cache.clear()
self._prepare_threads()
try:
logger.debug('Queueing %s', url)
self._to_fetch.put(url)
self._to_fetch.join()
finally:
self._wait_threads()
del self.result
return result
platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|'
r'win(32|-amd64)|macosx-?\d+)\b', re.I)
def _is_platform_dependent(self, url):
"""
Does an URL refer to a platform-specific download?
"""
return self.platform_dependent.search(url)
def _process_download(self, url):
"""
See if an URL is a suitable download for a project.
If it is, register information in the result dictionary (for
_get_project) about the specific version it's for.
Note that the return value isn't actually used other than as a boolean
value.
"""
if self._is_platform_dependent(url):
info = None
else:
info = self.convert_url_to_download_info(url, self.project_name)
logger.debug('process_download: %s -> %s', url, info)
if info:
with self._lock: # needed because self.result is shared
self._update_version_data(self.result, info)
return info
def _should_queue(self, link, referrer, rel):
"""
Determine whether a link URL from a referring page and with a
particular "rel" attribute should be queued for scraping.
"""
scheme, netloc, path, _, _, _ = urlparse(link)
if path.endswith(self.source_extensions + self.binary_extensions +
self.excluded_extensions):
result = False
elif self.skip_externals and not link.startswith(self.base_url):
result = False
elif not referrer.startswith(self.base_url):
result = False
elif rel not in ('homepage', 'download'):
result = False
elif scheme not in ('http', 'https', 'ftp'):
result = False
elif self._is_platform_dependent(link):
result = False
else:
host = netloc.split(':', 1)[0]
if host.lower() == 'localhost':
result = False
else:
result = True
logger.debug('should_queue: %s (%s) from %s -> %s', link, rel,
referrer, result)
return result
def _fetch(self):
"""
Get a URL to fetch from the work queue, get the HTML page, examine its
links for download candidates and candidates for further scraping.
This is a handy method to run in a thread.
"""
while True:
url = self._to_fetch.get()
try:
if url:
page = self.get_page(url)
if page is None: # e.g. after an error
continue
for link, rel in page.links:
if link not in self._seen:
self._seen.add(link)
if (not self._process_download(link) and
self._should_queue(link, url, rel)):
logger.debug('Queueing %s from %s', link, url)
self._to_fetch.put(link)
except Exception as e: # pragma: no cover
self.errors.put(text_type(e))
finally:
# always do this, to avoid hangs :-)
self._to_fetch.task_done()
if not url:
#logger.debug('Sentinel seen, quitting.')
break
def get_page(self, url):
"""
Get the HTML for an URL, possibly from an in-memory cache.
XXX TODO Note: this cache is never actually cleared. It's assumed that
the data won't get stale over the lifetime of a locator instance (not
necessarily true for the default_locator).
"""
# http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api
scheme, netloc, path, _, _, _ = urlparse(url)
if scheme == 'file' and os.path.isdir(url2pathname(path)):
url = urljoin(ensure_slash(url), 'index.html')
if url in self._page_cache:
result = self._page_cache[url]
logger.debug('Returning %s from cache: %s', url, result)
else:
host = netloc.split(':', 1)[0]
result = None
if host in self._bad_hosts:
logger.debug('Skipping %s due to bad host %s', url, host)
else:
req = Request(url, headers={'Accept-encoding': 'identity'})
try:
logger.debug('Fetching %s', url)
resp = self.opener.open(req, timeout=self.timeout)
logger.debug('Fetched %s', url)
headers = resp.info()
content_type = headers.get('Content-Type', '')
if HTML_CONTENT_TYPE.match(content_type):
final_url = resp.geturl()
data = resp.read()
encoding = headers.get('Content-Encoding')
if encoding:
decoder = self.decoders[encoding] # fail if not found
data = decoder(data)
encoding = 'utf-8'
m = CHARSET.search(content_type)
if m:
encoding = m.group(1)
try:
data = data.decode(encoding)
except UnicodeError: # pragma: no cover
data = data.decode('latin-1') # fallback
result = Page(data, final_url)
self._page_cache[final_url] = result
except HTTPError as e:
if e.code != 404:
logger.exception('Fetch failed: %s: %s', url, e)
except URLError as e: # pragma: no cover
logger.exception('Fetch failed: %s: %s', url, e)
with self._lock:
self._bad_hosts.add(host)
except Exception as e: # pragma: no cover
logger.exception('Fetch failed: %s: %s', url, e)
finally:
self._page_cache[url] = result # even if None (failure)
return result
_distname_re = re.compile('<a href=[^>]*>([^<]+)<')
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
result = set()
page = self.get_page(self.base_url)
if not page:
raise DistlibException('Unable to get %s' % self.base_url)
for match in self._distname_re.finditer(page.data):
result.add(match.group(1))
return result
class DirectoryLocator(Locator):
"""
This class locates distributions in a directory tree.
"""
def __init__(self, path, **kwargs):
"""
Initialise an instance.
:param path: The root of the directory tree to search.
:param kwargs: Passed to the superclass constructor,
except for:
* recursive - if True (the default), subdirectories are
recursed into. If False, only the top-level directory
is searched,
"""
self.recursive = kwargs.pop('recursive', True)
super(DirectoryLocator, self).__init__(**kwargs)
path = os.path.abspath(path)
if not os.path.isdir(path): # pragma: no cover
raise DistlibException('Not a directory: %r' % path)
self.base_dir = path
def should_include(self, filename, parent):
"""
Should a filename be considered as a candidate for a distribution
archive? As well as the filename, the directory which contains it
is provided, though not used by the current implementation.
"""
return filename.endswith(self.downloadable_extensions)
def _get_project(self, name):
result = {'urls': {}, 'digests': {}}
for root, dirs, files in os.walk(self.base_dir):
for fn in files:
if self.should_include(fn, root):
fn = os.path.join(root, fn)
url = urlunparse(('file', '',
pathname2url(os.path.abspath(fn)),
'', '', ''))
info = self.convert_url_to_download_info(url, name)
if info:
self._update_version_data(result, info)
if not self.recursive:
break
return result
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
result = set()
for root, dirs, files in os.walk(self.base_dir):
for fn in files:
if self.should_include(fn, root):
fn = os.path.join(root, fn)
url = urlunparse(('file', '',
pathname2url(os.path.abspath(fn)),
'', '', ''))
info = self.convert_url_to_download_info(url, None)
if info:
result.add(info['name'])
if not self.recursive:
break
return result
class JSONLocator(Locator):
"""
This locator uses special extended metadata (not available on PyPI) and is
the basis of performant dependency resolution in distlib. Other locators
require archive downloads before dependencies can be determined! As you
might imagine, that can be slow.
"""
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
raise NotImplementedError('Not available from this locator')
def _get_project(self, name):
result = {'urls': {}, 'digests': {}}
data = get_project_data(name)
if data:
for info in data.get('files', []):
if info['ptype'] != 'sdist' or info['pyversion'] != 'source':
continue
# We don't store summary in project metadata as it makes
# the data bigger for no benefit during dependency
# resolution
dist = make_dist(data['name'], info['version'],
summary=data.get('summary',
'Placeholder for summary'),
scheme=self.scheme)
md = dist.metadata
md.source_url = info['url']
# TODO SHA256 digest
if 'digest' in info and info['digest']:
dist.digest = ('md5', info['digest'])
md.dependencies = info.get('requirements', {})
dist.exports = info.get('exports', {})
result[dist.version] = dist
result['urls'].setdefault(dist.version, set()).add(info['url'])
return result
class DistPathLocator(Locator):
"""
This locator finds installed distributions in a path. It can be useful for
adding to an :class:`AggregatingLocator`.
"""
def __init__(self, distpath, **kwargs):
"""
Initialise an instance.
:param distpath: A :class:`DistributionPath` instance to search.
"""
super(DistPathLocator, self).__init__(**kwargs)
assert isinstance(distpath, DistributionPath)
self.distpath = distpath
def _get_project(self, name):
dist = self.distpath.get_distribution(name)
if dist is None:
result = {'urls': {}, 'digests': {}}
else:
result = {
dist.version: dist,
'urls': {dist.version: set([dist.source_url])},
'digests': {dist.version: set([None])}
}
return result
class AggregatingLocator(Locator):
"""
This class allows you to chain and/or merge a list of locators.
"""
def __init__(self, *locators, **kwargs):
"""
Initialise an instance.
:param locators: The list of locators to search.
:param kwargs: Passed to the superclass constructor,
except for:
* merge - if False (the default), the first successful
search from any of the locators is returned. If True,
the results from all locators are merged (this can be
slow).
"""
self.merge = kwargs.pop('merge', False)
self.locators = locators
super(AggregatingLocator, self).__init__(**kwargs)
def clear_cache(self):
super(AggregatingLocator, self).clear_cache()
for locator in self.locators:
locator.clear_cache()
def _set_scheme(self, value):
self._scheme = value
for locator in self.locators:
locator.scheme = value
scheme = property(Locator.scheme.fget, _set_scheme)
def _get_project(self, name):
result = {}
for locator in self.locators:
d = locator.get_project(name)
if d:
if self.merge:
files = result.get('urls', {})
digests = result.get('digests', {})
# next line could overwrite result['urls'], result['digests']
result.update(d)
df = result.get('urls')
if files and df:
for k, v in files.items():
if k in df:
df[k] |= v
else:
df[k] = v
dd = result.get('digests')
if digests and dd:
dd.update(digests)
else:
# See issue #18. If any dists are found and we're looking
# for specific constraints, we only return something if
# a match is found. For example, if a DirectoryLocator
# returns just foo (1.0) while we're looking for
# foo (>= 2.0), we'll pretend there was nothing there so
# that subsequent locators can be queried. Otherwise we
# would just return foo (1.0) which would then lead to a
# failure to find foo (>= 2.0), because other locators
# weren't searched. Note that this only matters when
# merge=False.
if self.matcher is None:
found = True
else:
found = False
for k in d:
if self.matcher.match(k):
found = True
break
if found:
result = d
break
return result
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
result = set()
for locator in self.locators:
try:
result |= locator.get_distribution_names()
except NotImplementedError:
pass
return result
# We use a legacy scheme simply because most of the dists on PyPI use legacy
# versions which don't conform to PEP 426 / PEP 440.
default_locator = AggregatingLocator(
JSONLocator(),
SimpleScrapingLocator('https://pypi.python.org/simple/',
timeout=3.0),
scheme='legacy')
locate = default_locator.locate
NAME_VERSION_RE = re.compile(r'(?P<name>[\w-]+)\s*'
r'\(\s*(==\s*)?(?P<ver>[^)]+)\)$')
class DependencyFinder(object):
"""
Locate dependencies for distributions.
"""
def __init__(self, locator=None):
"""
Initialise an instance, using the specified locator
to locate distributions.
"""
self.locator = locator or default_locator
self.scheme = get_scheme(self.locator.scheme)
def add_distribution(self, dist):
"""
Add a distribution to the finder. This will update internal information
about who provides what.
:param dist: The distribution to add.
"""
logger.debug('adding distribution %s', dist)
name = dist.key
self.dists_by_name[name] = dist
self.dists[(name, dist.version)] = dist
for p in dist.provides:
name, version = parse_name_and_version(p)
logger.debug('Add to provided: %s, %s, %s', name, version, dist)
self.provided.setdefault(name, set()).add((version, dist))
def remove_distribution(self, dist):
"""
Remove a distribution from the finder. This will update internal
information about who provides what.
:param dist: The distribution to remove.
"""
logger.debug('removing distribution %s', dist)
name = dist.key
del self.dists_by_name[name]
del self.dists[(name, dist.version)]
for p in dist.provides:
name, version = parse_name_and_version(p)
logger.debug('Remove from provided: %s, %s, %s', name, version, dist)
s = self.provided[name]
s.remove((version, dist))
if not s:
del self.provided[name]
def get_matcher(self, reqt):
"""
Get a version matcher for a requirement.
:param reqt: The requirement
:type reqt: str
:return: A version matcher (an instance of
:class:`distlib.version.Matcher`).
"""
try:
matcher = self.scheme.matcher(reqt)
except UnsupportedVersionError: # pragma: no cover
# XXX compat-mode if cannot read the version
name = reqt.split()[0]
matcher = self.scheme.matcher(name)
return matcher
def find_providers(self, reqt):
"""
Find the distributions which can fulfill a requirement.
:param reqt: The requirement.
:type reqt: str
:return: A set of distribution which can fulfill the requirement.
"""
matcher = self.get_matcher(reqt)
name = matcher.key # case-insensitive
result = set()
provided = self.provided
if name in provided:
for version, provider in provided[name]:
try:
match = matcher.match(version)
except UnsupportedVersionError:
match = False
if match:
result.add(provider)
break
return result
def try_to_replace(self, provider, other, problems):
"""
Attempt to replace one provider with another. This is typically used
when resolving dependencies from multiple sources, e.g. A requires
(B >= 1.0) while C requires (B >= 1.1).
For successful replacement, ``provider`` must meet all the requirements
which ``other`` fulfills.
:param provider: The provider we are trying to replace with.
:param other: The provider we're trying to replace.
:param problems: If False is returned, this will contain what
problems prevented replacement. This is currently
a tuple of the literal string 'cantreplace',
``provider``, ``other`` and the set of requirements
that ``provider`` couldn't fulfill.
:return: True if we can replace ``other`` with ``provider``, else
False.
"""
rlist = self.reqts[other]
unmatched = set()
for s in rlist:
matcher = self.get_matcher(s)
if not matcher.match(provider.version):
unmatched.add(s)
if unmatched:
# can't replace other with provider
problems.add(('cantreplace', provider, other,
frozenset(unmatched)))
result = False
else:
# can replace other with provider
self.remove_distribution(other)
del self.reqts[other]
for s in rlist:
self.reqts.setdefault(provider, set()).add(s)
self.add_distribution(provider)
result = True
return result
def find(self, requirement, meta_extras=None, prereleases=False):
"""
Find a distribution and all distributions it depends on.
:param requirement: The requirement specifying the distribution to
find, or a Distribution instance.
:param meta_extras: A list of meta extras such as :test:, :build: and
so on.
:param prereleases: If ``True``, allow pre-release versions to be
returned - otherwise, don't return prereleases
unless they're all that's available.
Return a set of :class:`Distribution` instances and a set of
problems.
The distributions returned should be such that they have the
:attr:`required` attribute set to ``True`` if they were
from the ``requirement`` passed to ``find()``, and they have the
:attr:`build_time_dependency` attribute set to ``True`` unless they
are post-installation dependencies of the ``requirement``.
The problems should be a tuple consisting of the string
``'unsatisfied'`` and the requirement which couldn't be satisfied
by any distribution known to the locator.
"""
self.provided = {}
self.dists = {}
self.dists_by_name = {}
self.reqts = {}
meta_extras = set(meta_extras or [])
if ':*:' in meta_extras:
meta_extras.remove(':*:')
# :meta: and :run: are implicitly included
meta_extras |= set([':test:', ':build:', ':dev:'])
if isinstance(requirement, Distribution):
dist = odist = requirement
logger.debug('passed %s as requirement', odist)
else:
dist = odist = self.locator.locate(requirement,
prereleases=prereleases)
if dist is None:
raise DistlibException('Unable to locate %r' % requirement)
logger.debug('located %s', odist)
dist.requested = True
problems = set()
todo = set([dist])
install_dists = set([odist])
while todo:
dist = todo.pop()
name = dist.key # case-insensitive
if name not in self.dists_by_name:
self.add_distribution(dist)
else:
#import pdb; pdb.set_trace()
other = self.dists_by_name[name]
if other != dist:
self.try_to_replace(dist, other, problems)
ireqts = dist.run_requires | dist.meta_requires
sreqts = dist.build_requires
ereqts = set()
if dist in install_dists:
for key in ('test', 'build', 'dev'):
e = ':%s:' % key
if e in meta_extras:
ereqts |= getattr(dist, '%s_requires' % key)
all_reqts = ireqts | sreqts | ereqts
for r in all_reqts:
providers = self.find_providers(r)
if not providers:
logger.debug('No providers found for %r', r)
provider = self.locator.locate(r, prereleases=prereleases)
# If no provider is found and we didn't consider
# prereleases, consider them now.
if provider is None and not prereleases:
provider = self.locator.locate(r, prereleases=True)
if provider is None:
logger.debug('Cannot satisfy %r', r)
problems.add(('unsatisfied', r))
else:
n, v = provider.key, provider.version
if (n, v) not in self.dists:
todo.add(provider)
providers.add(provider)
if r in ireqts and dist in install_dists:
install_dists.add(provider)
logger.debug('Adding %s to install_dists',
provider.name_and_version)
for p in providers:
name = p.key
if name not in self.dists_by_name:
self.reqts.setdefault(p, set()).add(r)
else:
other = self.dists_by_name[name]
if other != p:
# see if other can be replaced by p
self.try_to_replace(p, other, problems)
dists = set(self.dists.values())
for dist in dists:
dist.build_time_dependency = dist not in install_dists
if dist.build_time_dependency:
logger.debug('%s is a build-time dependency only.',
dist.name_and_version)
logger.debug('find done for %s', odist)
return dists, problems
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""
Class representing the list of files in a distribution.
Equivalent to distutils.filelist, but fixes some problems.
"""
import fnmatch
import logging
import os
import re
import sys
from . import DistlibException
from .compat import fsdecode
from .util import convert_path
__all__ = ['Manifest']
logger = logging.getLogger(__name__)
# a \ followed by some spaces + EOL
_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
#
# Due to the different results returned by fnmatch.translate, we need
# to do slightly different processing for Python 2.7 and 3.2 ... this needed
# to be brought in for Python 3.6 onwards.
#
_PYTHON_VERSION = sys.version_info[:2]
class Manifest(object):
"""A list of files built by on exploring the filesystem and filtered by
applying various patterns to what we find there.
"""
def __init__(self, base=None):
"""
Initialise an instance.
:param base: The base directory to explore under.
"""
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
self.prefix = self.base + os.sep
self.allfiles = None
self.files = set()
#
# Public API
#
def findall(self):
"""Find all files under the base and set ``allfiles`` to the absolute
pathnames of files found.
"""
from stat import S_ISREG, S_ISDIR, S_ISLNK
self.allfiles = allfiles = []
root = self.base
stack = [root]
pop = stack.pop
push = stack.append
while stack:
root = pop()
names = os.listdir(root)
for name in names:
fullname = os.path.join(root, name)
# Avoid excess stat calls -- just one will do, thank you!
stat = os.stat(fullname)
mode = stat.st_mode
if S_ISREG(mode):
allfiles.append(fsdecode(fullname))
elif S_ISDIR(mode) and not S_ISLNK(mode):
push(fullname)
def add(self, item):
"""
Add a file to the manifest.
:param item: The pathname to add. This can be relative to the base.
"""
if not item.startswith(self.prefix):
item = os.path.join(self.base, item)
self.files.add(os.path.normpath(item))
def add_many(self, items):
"""
Add a list of files to the manifest.
:param items: The pathnames to add. These can be relative to the base.
"""
for item in items:
self.add(item)
def sorted(self, wantdirs=False):
"""
Return sorted files in directory order
"""
def add_dir(dirs, d):
dirs.add(d)
logger.debug('add_dir added %s', d)
if d != self.base:
parent, _ = os.path.split(d)
assert parent not in ('', '/')
add_dir(dirs, parent)
result = set(self.files) # make a copy!
if wantdirs:
dirs = set()
for f in result:
add_dir(dirs, os.path.dirname(f))
result |= dirs
return [os.path.join(*path_tuple) for path_tuple in
sorted(os.path.split(path) for path in result)]
def clear(self):
"""Clear all collected files."""
self.files = set()
self.allfiles = []
def process_directive(self, directive):
"""
Process a directive which either adds some files from ``allfiles`` to
``files``, or removes some files from ``files``.
:param directive: The directive to process. This should be in a format
compatible with distutils ``MANIFEST.in`` files:
http://docs.python.org/distutils/sourcedist.html#commands
"""
# Parse the line: split it up, make sure the right number of words
# is there, and return the relevant words. 'action' is always
# defined: it's the first word of the line. Which of the other
# three are defined depends on the action; it'll be either
# patterns, (dir and patterns), or (dirpattern).
action, patterns, thedir, dirpattern = self._parse_directive(directive)
# OK, now we know that the action is valid and we have the
# right number of words on the line for that action -- so we
# can proceed with minimal error-checking.
if action == 'include':
for pattern in patterns:
if not self._include_pattern(pattern, anchor=True):
logger.warning('no files found matching %r', pattern)
elif action == 'exclude':
for pattern in patterns:
found = self._exclude_pattern(pattern, anchor=True)
#if not found:
# logger.warning('no previously-included files '
# 'found matching %r', pattern)
elif action == 'global-include':
for pattern in patterns:
if not self._include_pattern(pattern, anchor=False):
logger.warning('no files found matching %r '
'anywhere in distribution', pattern)
elif action == 'global-exclude':
for pattern in patterns:
found = self._exclude_pattern(pattern, anchor=False)
#if not found:
# logger.warning('no previously-included files '
# 'matching %r found anywhere in '
# 'distribution', pattern)
elif action == 'recursive-include':
for pattern in patterns:
if not self._include_pattern(pattern, prefix=thedir):
logger.warning('no files found matching %r '
'under directory %r', pattern, thedir)
elif action == 'recursive-exclude':
for pattern in patterns:
found = self._exclude_pattern(pattern, prefix=thedir)
#if not found:
# logger.warning('no previously-included files '
# 'matching %r found under directory %r',
# pattern, thedir)
elif action == 'graft':
if not self._include_pattern(None, prefix=dirpattern):
logger.warning('no directories found matching %r',
dirpattern)
elif action == 'prune':
if not self._exclude_pattern(None, prefix=dirpattern):
logger.warning('no previously-included directories found '
'matching %r', dirpattern)
else: # pragma: no cover
# This should never happen, as it should be caught in
# _parse_template_line
raise DistlibException(
'invalid action %r' % action)
#
# Private API
#
def _parse_directive(self, directive):
"""
Validate a directive.
:param directive: The directive to validate.
:return: A tuple of action, patterns, thedir, dir_patterns
"""
words = directive.split()
if len(words) == 1 and words[0] not in ('include', 'exclude',
'global-include',
'global-exclude',
'recursive-include',
'recursive-exclude',
'graft', 'prune'):
# no action given, let's use the default 'include'
words.insert(0, 'include')
action = words[0]
patterns = thedir = dir_pattern = None
if action in ('include', 'exclude',
'global-include', 'global-exclude'):
if len(words) < 2:
raise DistlibException(
'%r expects <pattern1> <pattern2> ...' % action)
patterns = [convert_path(word) for word in words[1:]]
elif action in ('recursive-include', 'recursive-exclude'):
if len(words) < 3:
raise DistlibException(
'%r expects <dir> <pattern1> <pattern2> ...' % action)
thedir = convert_path(words[1])
patterns = [convert_path(word) for word in words[2:]]
elif action in ('graft', 'prune'):
if len(words) != 2:
raise DistlibException(
'%r expects a single <dir_pattern>' % action)
dir_pattern = convert_path(words[1])
else:
raise DistlibException('unknown action %r' % action)
return action, patterns, thedir, dir_pattern
def _include_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Select strings (presumably filenames) from 'self.files' that
match 'pattern', a Unix-style wildcard (glob) pattern.
Patterns are not quite the same as implemented by the 'fnmatch'
module: '*' and '?' match non-special characters, where "special"
is platform-dependent: slash on Unix; colon, slash, and backslash on
DOS/Windows; and colon on Mac OS.
If 'anchor' is true (the default), then the pattern match is more
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
'anchor' is false, both of these will match.
If 'prefix' is supplied, then only filenames starting with 'prefix'
(itself a pattern) and ending with 'pattern', with anything in between
them, will match. 'anchor' is ignored in this case.
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
'pattern' is assumed to be either a string containing a regex or a
regex object -- no translation is done, the regex is just compiled
and used as-is.
Selected strings will be added to self.files.
Return True if files are found.
"""
# XXX docstring lying about what the special chars are?
found = False
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
# delayed loading of allfiles list
if self.allfiles is None:
self.findall()
for name in self.allfiles:
if pattern_re.search(name):
self.files.add(name)
found = True
return found
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Remove strings (presumably filenames) from 'files' that match
'pattern'.
Other parameters are the same as for 'include_pattern()', above.
The list 'self.files' is modified in place. Return True if files are
found.
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
packaging source distributions
"""
found = False
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
for f in list(self.files):
if pattern_re.search(f):
self.files.remove(f)
found = True
return found
def _translate_pattern(self, pattern, anchor=True, prefix=None,
is_regex=False):
"""Translate a shell-like wildcard pattern to a compiled regular
expression.
Return the compiled regex. If 'is_regex' true,
then 'pattern' is directly compiled to a regex (if it's a string)
or just returned as-is (assumes it's a regex object).
"""
if is_regex:
if isinstance(pattern, str):
return re.compile(pattern)
else:
return pattern
if _PYTHON_VERSION > (3, 2):
# ditch start and end characters
start, _, end = self._glob_to_re('_').partition('_')
if pattern:
pattern_re = self._glob_to_re(pattern)
if _PYTHON_VERSION > (3, 2):
assert pattern_re.startswith(start) and pattern_re.endswith(end)
else:
pattern_re = ''
base = re.escape(os.path.join(self.base, ''))
if prefix is not None:
# ditch end of pattern character
if _PYTHON_VERSION <= (3, 2):
empty_pattern = self._glob_to_re('')
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
else:
prefix_re = self._glob_to_re(prefix)
assert prefix_re.startswith(start) and prefix_re.endswith(end)
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
if _PYTHON_VERSION <= (3, 2):
pattern_re = '^' + base + sep.join((prefix_re,
'.*' + pattern_re))
else:
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
pattern_re, end)
else: # no prefix -- respect anchor flag
if anchor:
if _PYTHON_VERSION <= (3, 2):
pattern_re = '^' + base + pattern_re
else:
pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
return re.compile(pattern_re)
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""Parser for the environment markers micro-language defined in PEP 345."""
import ast
import os
import sys
import platform
from .compat import python_implementation, string_types
from .util import in_venv
__all__ = ['interpret']
class Evaluator(object):
"""
A limited evaluator for Python expressions.
"""
operators = {
'eq': lambda x, y: x == y,
'gt': lambda x, y: x > y,
'gte': lambda x, y: x >= y,
'in': lambda x, y: x in y,
'lt': lambda x, y: x < y,
'lte': lambda x, y: x <= y,
'not': lambda x: not x,
'noteq': lambda x, y: x != y,
'notin': lambda x, y: x not in y,
}
allowed_values = {
'sys_platform': sys.platform,
'python_version': '%s.%s' % sys.version_info[:2],
# parsing sys.platform is not reliable, but there is no other
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
'python_full_version': sys.version.split(' ', 1)[0],
'os_name': os.name,
'platform_in_venv': str(in_venv()),
'platform_release': platform.release(),
'platform_version': platform.version(),
'platform_machine': platform.machine(),
'platform_python_implementation': python_implementation(),
}
def __init__(self, context=None):
"""
Initialise an instance.
:param context: If specified, names are looked up in this mapping.
"""
self.context = context or {}
self.source = None
def get_fragment(self, offset):
"""
Get the part of the source which is causing a problem.
"""
fragment_len = 10
s = '%r' % (self.source[offset:offset + fragment_len])
if offset + fragment_len < len(self.source):
s += '...'
return s
def get_handler(self, node_type):
"""
Get a handler for the specified AST node type.
"""
return getattr(self, 'do_%s' % node_type, None)
def evaluate(self, node, filename=None):
"""
Evaluate a source string or node, using ``filename`` when
displaying errors.
"""
if isinstance(node, string_types):
self.source = node
kwargs = {'mode': 'eval'}
if filename:
kwargs['filename'] = filename
try:
node = ast.parse(node, **kwargs)
except SyntaxError as e:
s = self.get_fragment(e.offset)
raise SyntaxError('syntax error %s' % s)
node_type = node.__class__.__name__.lower()
handler = self.get_handler(node_type)
if handler is None:
if self.source is None:
s = '(source not available)'
else:
s = self.get_fragment(node.col_offset)
raise SyntaxError("don't know how to evaluate %r %s" % (
node_type, s))
return handler(node)
def get_attr_key(self, node):
assert isinstance(node, ast.Attribute), 'attribute node expected'
return '%s.%s' % (node.value.id, node.attr)
def do_attribute(self, node):
if not isinstance(node.value, ast.Name):
valid = False
else:
key = self.get_attr_key(node)
valid = key in self.context or key in self.allowed_values
if not valid:
raise SyntaxError('invalid expression: %s' % key)
if key in self.context:
result = self.context[key]
else:
result = self.allowed_values[key]
return result
def do_boolop(self, node):
result = self.evaluate(node.values[0])
is_or = node.op.__class__ is ast.Or
is_and = node.op.__class__ is ast.And
assert is_or or is_and
if (is_and and result) or (is_or and not result):
for n in node.values[1:]:
result = self.evaluate(n)
if (is_or and result) or (is_and and not result):
break
return result
def do_compare(self, node):
def sanity_check(lhsnode, rhsnode):
valid = True
if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str):
valid = False
#elif (isinstance(lhsnode, ast.Attribute)
# and isinstance(rhsnode, ast.Attribute)):
# klhs = self.get_attr_key(lhsnode)
# krhs = self.get_attr_key(rhsnode)
# valid = klhs != krhs
if not valid:
s = self.get_fragment(node.col_offset)
raise SyntaxError('Invalid comparison: %s' % s)
lhsnode = node.left
lhs = self.evaluate(lhsnode)
result = True
for op, rhsnode in zip(node.ops, node.comparators):
sanity_check(lhsnode, rhsnode)
op = op.__class__.__name__.lower()
if op not in self.operators:
raise SyntaxError('unsupported operation: %r' % op)
rhs = self.evaluate(rhsnode)
result = self.operators[op](lhs, rhs)
if not result:
break
lhs = rhs
lhsnode = rhsnode
return result
def do_expression(self, node):
return self.evaluate(node.body)
def do_name(self, node):
valid = False
if node.id in self.context:
valid = True
result = self.context[node.id]
elif node.id in self.allowed_values:
valid = True
result = self.allowed_values[node.id]
if not valid:
raise SyntaxError('invalid expression: %s' % node.id)
return result
def do_str(self, node):
return node.s
def interpret(marker, execution_context=None):
"""
Interpret a marker and return a result depending on environment.
:param marker: The marker to interpret.
:type marker: str
:param execution_context: The context used for name lookup.
:type execution_context: mapping
"""
return Evaluator(execution_context).evaluate(marker.strip())
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""Implementation of the Metadata for Python packages PEPs.
Supports all metadata formats (1.0, 1.1, 1.2, and 2.0 experimental).
"""
from __future__ import unicode_literals
import codecs
from email import message_from_file
import json
import logging
import re
from . import DistlibException, __version__
from .compat import StringIO, string_types, text_type
from .markers import interpret
from .util import extract_by_key, get_extras
from .version import get_scheme, PEP440_VERSION_RE
logger = logging.getLogger(__name__)
class MetadataMissingError(DistlibException):
"""A required metadata is missing"""
class MetadataConflictError(DistlibException):
"""Attempt to read or write metadata fields that are conflictual."""
class MetadataUnrecognizedVersionError(DistlibException):
"""Unknown metadata version number."""
class MetadataInvalidError(DistlibException):
"""A metadata value is invalid"""
# public API of this module
__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION']
# Encoding used for the PKG-INFO files
PKG_INFO_ENCODING = 'utf-8'
# preferred version. Hopefully will be changed
# to 1.2 once PEP 345 is supported everywhere
PKG_INFO_PREFERRED_VERSION = '1.1'
_LINE_PREFIX_1_2 = re.compile('\n \|')
_LINE_PREFIX_PRE_1_2 = re.compile('\n ')
_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'License')
_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Supported-Platform', 'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'License', 'Classifier', 'Download-URL', 'Obsoletes',
'Provides', 'Requires')
_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier',
'Download-URL')
_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Supported-Platform', 'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'Maintainer', 'Maintainer-email', 'License',
'Classifier', 'Download-URL', 'Obsoletes-Dist',
'Project-URL', 'Provides-Dist', 'Requires-Dist',
'Requires-Python', 'Requires-External')
_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python',
'Obsoletes-Dist', 'Requires-External', 'Maintainer',
'Maintainer-email', 'Project-URL')
_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Supported-Platform', 'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'Maintainer', 'Maintainer-email', 'License',
'Classifier', 'Download-URL', 'Obsoletes-Dist',
'Project-URL', 'Provides-Dist', 'Requires-Dist',
'Requires-Python', 'Requires-External', 'Private-Version',
'Obsoleted-By', 'Setup-Requires-Dist', 'Extension',
'Provides-Extra')
_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By',
'Setup-Requires-Dist', 'Extension')
_ALL_FIELDS = set()
_ALL_FIELDS.update(_241_FIELDS)
_ALL_FIELDS.update(_314_FIELDS)
_ALL_FIELDS.update(_345_FIELDS)
_ALL_FIELDS.update(_426_FIELDS)
EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''')
def _version2fieldlist(version):
if version == '1.0':
return _241_FIELDS
elif version == '1.1':
return _314_FIELDS
elif version == '1.2':
return _345_FIELDS
elif version == '2.0':
return _426_FIELDS
raise MetadataUnrecognizedVersionError(version)
def _best_version(fields):
"""Detect the best version depending on the fields used."""
def _has_marker(keys, markers):
for marker in markers:
if marker in keys:
return True
return False
keys = []
for key, value in fields.items():
if value in ([], 'UNKNOWN', None):
continue
keys.append(key)
possible_versions = ['1.0', '1.1', '1.2', '2.0']
# first let's try to see if a field is not part of one of the version
for key in keys:
if key not in _241_FIELDS and '1.0' in possible_versions:
possible_versions.remove('1.0')
if key not in _314_FIELDS and '1.1' in possible_versions:
possible_versions.remove('1.1')
if key not in _345_FIELDS and '1.2' in possible_versions:
possible_versions.remove('1.2')
if key not in _426_FIELDS and '2.0' in possible_versions:
possible_versions.remove('2.0')
# possible_version contains qualified versions
if len(possible_versions) == 1:
return possible_versions[0] # found !
elif len(possible_versions) == 0:
raise MetadataConflictError('Unknown metadata set')
# let's see if one unique marker is found
is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS)
is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS)
is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS)
if int(is_1_1) + int(is_1_2) + int(is_2_0) > 1:
raise MetadataConflictError('You used incompatible 1.1/1.2/2.0 fields')
# we have the choice, 1.0, or 1.2, or 2.0
# - 1.0 has a broken Summary field but works with all tools
# - 1.1 is to avoid
# - 1.2 fixes Summary but has little adoption
# - 2.0 adds more features and is very new
if not is_1_1 and not is_1_2 and not is_2_0:
# we couldn't find any specific marker
if PKG_INFO_PREFERRED_VERSION in possible_versions:
return PKG_INFO_PREFERRED_VERSION
if is_1_1:
return '1.1'
if is_1_2:
return '1.2'
return '2.0'
_ATTR2FIELD = {
'metadata_version': 'Metadata-Version',
'name': 'Name',
'version': 'Version',
'platform': 'Platform',
'supported_platform': 'Supported-Platform',
'summary': 'Summary',
'description': 'Description',
'keywords': 'Keywords',
'home_page': 'Home-page',
'author': 'Author',
'author_email': 'Author-email',
'maintainer': 'Maintainer',
'maintainer_email': 'Maintainer-email',
'license': 'License',
'classifier': 'Classifier',
'download_url': 'Download-URL',
'obsoletes_dist': 'Obsoletes-Dist',
'provides_dist': 'Provides-Dist',
'requires_dist': 'Requires-Dist',
'setup_requires_dist': 'Setup-Requires-Dist',
'requires_python': 'Requires-Python',
'requires_external': 'Requires-External',
'requires': 'Requires',
'provides': 'Provides',
'obsoletes': 'Obsoletes',
'project_url': 'Project-URL',
'private_version': 'Private-Version',
'obsoleted_by': 'Obsoleted-By',
'extension': 'Extension',
'provides_extra': 'Provides-Extra',
}
_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist')
_VERSIONS_FIELDS = ('Requires-Python',)
_VERSION_FIELDS = ('Version',)
_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes',
'Requires', 'Provides', 'Obsoletes-Dist',
'Provides-Dist', 'Requires-Dist', 'Requires-External',
'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist',
'Provides-Extra', 'Extension')
_LISTTUPLEFIELDS = ('Project-URL',)
_ELEMENTSFIELD = ('Keywords',)
_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description')
_MISSING = object()
_FILESAFE = re.compile('[^A-Za-z0-9.]+')
def _get_name_and_version(name, version, for_filename=False):
"""Return the distribution name with version.
If for_filename is true, return a filename-escaped form."""
if for_filename:
# For both name and version any runs of non-alphanumeric or '.'
# characters are replaced with a single '-'. Additionally any
# spaces in the version string become '.'
name = _FILESAFE.sub('-', name)
version = _FILESAFE.sub('-', version.replace(' ', '.'))
return '%s-%s' % (name, version)
class LegacyMetadata(object):
"""The legacy metadata of a release.
Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can
instantiate the class with one of these arguments (or none):
- *path*, the path to a metadata file
- *fileobj* give a file-like object with metadata as content
- *mapping* is a dict-like object
- *scheme* is a version scheme name
"""
# TODO document the mapping API and UNKNOWN default key
def __init__(self, path=None, fileobj=None, mapping=None,
scheme='default'):
if [path, fileobj, mapping].count(None) < 2:
raise TypeError('path, fileobj and mapping are exclusive')
self._fields = {}
self.requires_files = []
self._dependencies = None
self.scheme = scheme
if path is not None:
self.read(path)
elif fileobj is not None:
self.read_file(fileobj)
elif mapping is not None:
self.update(mapping)
self.set_metadata_version()
def set_metadata_version(self):
self._fields['Metadata-Version'] = _best_version(self._fields)
def _write_field(self, fileobj, name, value):
fileobj.write('%s: %s\n' % (name, value))
def __getitem__(self, name):
return self.get(name)
def __setitem__(self, name, value):
return self.set(name, value)
def __delitem__(self, name):
field_name = self._convert_name(name)
try:
del self._fields[field_name]
except KeyError:
raise KeyError(name)
def __contains__(self, name):
return (name in self._fields or
self._convert_name(name) in self._fields)
def _convert_name(self, name):
if name in _ALL_FIELDS:
return name
name = name.replace('-', '_').lower()
return _ATTR2FIELD.get(name, name)
def _default_value(self, name):
if name in _LISTFIELDS or name in _ELEMENTSFIELD:
return []
return 'UNKNOWN'
def _remove_line_prefix(self, value):
if self.metadata_version in ('1.0', '1.1'):
return _LINE_PREFIX_PRE_1_2.sub('\n', value)
else:
return _LINE_PREFIX_1_2.sub('\n', value)
def __getattr__(self, name):
if name in _ATTR2FIELD:
return self[name]
raise AttributeError(name)
#
# Public API
#
# dependencies = property(_get_dependencies, _set_dependencies)
def get_fullname(self, filesafe=False):
"""Return the distribution name with version.
If filesafe is true, return a filename-escaped form."""
return _get_name_and_version(self['Name'], self['Version'], filesafe)
def is_field(self, name):
"""return True if name is a valid metadata key"""
name = self._convert_name(name)
return name in _ALL_FIELDS
def is_multi_field(self, name):
name = self._convert_name(name)
return name in _LISTFIELDS
def read(self, filepath):
"""Read the metadata values from a file path."""
fp = codecs.open(filepath, 'r', encoding='utf-8')
try:
self.read_file(fp)
finally:
fp.close()
def read_file(self, fileob):
"""Read the metadata values from a file object."""
msg = message_from_file(fileob)
self._fields['Metadata-Version'] = msg['metadata-version']
# When reading, get all the fields we can
for field in _ALL_FIELDS:
if field not in msg:
continue
if field in _LISTFIELDS:
# we can have multiple lines
values = msg.get_all(field)
if field in _LISTTUPLEFIELDS and values is not None:
values = [tuple(value.split(',')) for value in values]
self.set(field, values)
else:
# single line
value = msg[field]
if value is not None and value != 'UNKNOWN':
self.set(field, value)
self.set_metadata_version()
def write(self, filepath, skip_unknown=False):
"""Write the metadata fields to filepath."""
fp = codecs.open(filepath, 'w', encoding='utf-8')
try:
self.write_file(fp, skip_unknown)
finally:
fp.close()
def write_file(self, fileobject, skip_unknown=False):
"""Write the PKG-INFO format data to a file object."""
self.set_metadata_version()
for field in _version2fieldlist(self['Metadata-Version']):
values = self.get(field)
if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']):
continue
if field in _ELEMENTSFIELD:
self._write_field(fileobject, field, ','.join(values))
continue
if field not in _LISTFIELDS:
if field == 'Description':
if self.metadata_version in ('1.0', '1.1'):
values = values.replace('\n', '\n ')
else:
values = values.replace('\n', '\n |')
values = [values]
if field in _LISTTUPLEFIELDS:
values = [','.join(value) for value in values]
for value in values:
self._write_field(fileobject, field, value)
def update(self, other=None, **kwargs):
"""Set metadata values from the given iterable `other` and kwargs.
Behavior is like `dict.update`: If `other` has a ``keys`` method,
they are looped over and ``self[key]`` is assigned ``other[key]``.
Else, ``other`` is an iterable of ``(key, value)`` iterables.
Keys that don't match a metadata field or that have an empty value are
dropped.
"""
def _set(key, value):
if key in _ATTR2FIELD and value:
self.set(self._convert_name(key), value)
if not other:
# other is None or empty container
pass
elif hasattr(other, 'keys'):
for k in other.keys():
_set(k, other[k])
else:
for k, v in other:
_set(k, v)
if kwargs:
for k, v in kwargs.items():
_set(k, v)
def set(self, name, value):
"""Control then set a metadata field."""
name = self._convert_name(name)
if ((name in _ELEMENTSFIELD or name == 'Platform') and
not isinstance(value, (list, tuple))):
if isinstance(value, string_types):
value = [v.strip() for v in value.split(',')]
else:
value = []
elif (name in _LISTFIELDS and
not isinstance(value, (list, tuple))):
if isinstance(value, string_types):
value = [value]
else:
value = []
if logger.isEnabledFor(logging.WARNING):
project_name = self['Name']
scheme = get_scheme(self.scheme)
if name in _PREDICATE_FIELDS and value is not None:
for v in value:
# check that the values are valid
if not scheme.is_valid_matcher(v.split(';')[0]):
logger.warning(
"'%s': '%s' is not valid (field '%s')",
project_name, v, name)
# FIXME this rejects UNKNOWN, is that right?
elif name in _VERSIONS_FIELDS and value is not None:
if not scheme.is_valid_constraint_list(value):
logger.warning("'%s': '%s' is not a valid version (field '%s')",
project_name, value, name)
elif name in _VERSION_FIELDS and value is not None:
if not scheme.is_valid_version(value):
logger.warning("'%s': '%s' is not a valid version (field '%s')",
project_name, value, name)
if name in _UNICODEFIELDS:
if name == 'Description':
value = self._remove_line_prefix(value)
self._fields[name] = value
def get(self, name, default=_MISSING):
"""Get a metadata field."""
name = self._convert_name(name)
if name not in self._fields:
if default is _MISSING:
default = self._default_value(name)
return default
if name in _UNICODEFIELDS:
value = self._fields[name]
return value
elif name in _LISTFIELDS:
value = self._fields[name]
if value is None:
return []
res = []
for val in value:
if name not in _LISTTUPLEFIELDS:
res.append(val)
else:
# That's for Project-URL
res.append((val[0], val[1]))
return res
elif name in _ELEMENTSFIELD:
value = self._fields[name]
if isinstance(value, string_types):
return value.split(',')
return self._fields[name]
def check(self, strict=False):
"""Check if the metadata is compliant. If strict is True then raise if
no Name or Version are provided"""
self.set_metadata_version()
# XXX should check the versions (if the file was loaded)
missing, warnings = [], []
for attr in ('Name', 'Version'): # required by PEP 345
if attr not in self:
missing.append(attr)
if strict and missing != []:
msg = 'missing required metadata: %s' % ', '.join(missing)
raise MetadataMissingError(msg)
for attr in ('Home-page', 'Author'):
if attr not in self:
missing.append(attr)
# checking metadata 1.2 (XXX needs to check 1.1, 1.0)
if self['Metadata-Version'] != '1.2':
return missing, warnings
scheme = get_scheme(self.scheme)
def are_valid_constraints(value):
for v in value:
if not scheme.is_valid_matcher(v.split(';')[0]):
return False
return True
for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints),
(_VERSIONS_FIELDS,
scheme.is_valid_constraint_list),
(_VERSION_FIELDS,
scheme.is_valid_version)):
for field in fields:
value = self.get(field, None)
if value is not None and not controller(value):
warnings.append("Wrong value for '%s': %s" % (field, value))
return missing, warnings
def todict(self, skip_missing=False):
"""Return fields as a dict.
Field names will be converted to use the underscore-lowercase style
instead of hyphen-mixed case (i.e. home_page instead of Home-page).
"""
self.set_metadata_version()
mapping_1_0 = (
('metadata_version', 'Metadata-Version'),
('name', 'Name'),
('version', 'Version'),
('summary', 'Summary'),
('home_page', 'Home-page'),
('author', 'Author'),
('author_email', 'Author-email'),
('license', 'License'),
('description', 'Description'),
('keywords', 'Keywords'),
('platform', 'Platform'),
('classifiers', 'Classifier'),
('download_url', 'Download-URL'),
)
data = {}
for key, field_name in mapping_1_0:
if not skip_missing or field_name in self._fields:
data[key] = self[field_name]
if self['Metadata-Version'] == '1.2':
mapping_1_2 = (
('requires_dist', 'Requires-Dist'),
('requires_python', 'Requires-Python'),
('requires_external', 'Requires-External'),
('provides_dist', 'Provides-Dist'),
('obsoletes_dist', 'Obsoletes-Dist'),
('project_url', 'Project-URL'),
('maintainer', 'Maintainer'),
('maintainer_email', 'Maintainer-email'),
)
for key, field_name in mapping_1_2:
if not skip_missing or field_name in self._fields:
if key != 'project_url':
data[key] = self[field_name]
else:
data[key] = [','.join(u) for u in self[field_name]]
elif self['Metadata-Version'] == '1.1':
mapping_1_1 = (
('provides', 'Provides'),
('requires', 'Requires'),
('obsoletes', 'Obsoletes'),
)
for key, field_name in mapping_1_1:
if not skip_missing or field_name in self._fields:
data[key] = self[field_name]
return data
def add_requirements(self, requirements):
if self['Metadata-Version'] == '1.1':
# we can't have 1.1 metadata *and* Setuptools requires
for field in ('Obsoletes', 'Requires', 'Provides'):
if field in self:
del self[field]
self['Requires-Dist'] += requirements
# Mapping API
# TODO could add iter* variants
def keys(self):
return list(_version2fieldlist(self['Metadata-Version']))
def __iter__(self):
for key in self.keys():
yield key
def values(self):
return [self[key] for key in self.keys()]
def items(self):
return [(key, self[key]) for key in self.keys()]
def __repr__(self):
return '<%s %s %s>' % (self.__class__.__name__, self.name,
self.version)
METADATA_FILENAME = 'pydist.json'
WHEEL_METADATA_FILENAME = 'metadata.json'
class Metadata(object):
"""
The metadata of a release. This implementation uses 2.0 (JSON)
metadata where possible. If not possible, it wraps a LegacyMetadata
instance which handles the key-value metadata format.
"""
METADATA_VERSION_MATCHER = re.compile('^\d+(\.\d+)*$')
NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I)
VERSION_MATCHER = PEP440_VERSION_RE
SUMMARY_MATCHER = re.compile('.{1,2047}')
METADATA_VERSION = '2.0'
GENERATOR = 'distlib (%s)' % __version__
MANDATORY_KEYS = {
'name': (),
'version': (),
'summary': ('legacy',),
}
INDEX_KEYS = ('name version license summary description author '
'author_email keywords platform home_page classifiers '
'download_url')
DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires '
'dev_requires provides meta_requires obsoleted_by '
'supports_environments')
SYNTAX_VALIDATORS = {
'metadata_version': (METADATA_VERSION_MATCHER, ()),
'name': (NAME_MATCHER, ('legacy',)),
'version': (VERSION_MATCHER, ('legacy',)),
'summary': (SUMMARY_MATCHER, ('legacy',)),
}
__slots__ = ('_legacy', '_data', 'scheme')
def __init__(self, path=None, fileobj=None, mapping=None,
scheme='default'):
if [path, fileobj, mapping].count(None) < 2:
raise TypeError('path, fileobj and mapping are exclusive')
self._legacy = None
self._data = None
self.scheme = scheme
#import pdb; pdb.set_trace()
if mapping is not None:
try:
self._validate_mapping(mapping, scheme)
self._data = mapping
except MetadataUnrecognizedVersionError:
self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme)
self.validate()
else:
data = None
if path:
with open(path, 'rb') as f:
data = f.read()
elif fileobj:
data = fileobj.read()
if data is None:
# Initialised with no args - to be added
self._data = {
'metadata_version': self.METADATA_VERSION,
'generator': self.GENERATOR,
}
else:
if not isinstance(data, text_type):
data = data.decode('utf-8')
try:
self._data = json.loads(data)
self._validate_mapping(self._data, scheme)
except ValueError:
# Note: MetadataUnrecognizedVersionError does not
# inherit from ValueError (it's a DistlibException,
# which should not inherit from ValueError).
# The ValueError comes from the json.load - if that
# succeeds and we get a validation error, we want
# that to propagate
self._legacy = LegacyMetadata(fileobj=StringIO(data),
scheme=scheme)
self.validate()
common_keys = set(('name', 'version', 'license', 'keywords', 'summary'))
none_list = (None, list)
none_dict = (None, dict)
mapped_keys = {
'run_requires': ('Requires-Dist', list),
'build_requires': ('Setup-Requires-Dist', list),
'dev_requires': none_list,
'test_requires': none_list,
'meta_requires': none_list,
'extras': ('Provides-Extra', list),
'modules': none_list,
'namespaces': none_list,
'exports': none_dict,
'commands': none_dict,
'classifiers': ('Classifier', list),
'source_url': ('Download-URL', None),
'metadata_version': ('Metadata-Version', None),
}
del none_list, none_dict
def __getattribute__(self, key):
common = object.__getattribute__(self, 'common_keys')
mapped = object.__getattribute__(self, 'mapped_keys')
if key in mapped:
lk, maker = mapped[key]
if self._legacy:
if lk is None:
result = None if maker is None else maker()
else:
result = self._legacy.get(lk)
else:
value = None if maker is None else maker()
if key not in ('commands', 'exports', 'modules', 'namespaces',
'classifiers'):
result = self._data.get(key, value)
else:
# special cases for PEP 459
sentinel = object()
result = sentinel
d = self._data.get('extensions')
if d:
if key == 'commands':
result = d.get('python.commands', value)
elif key == 'classifiers':
d = d.get('python.details')
if d:
result = d.get(key, value)
else:
d = d.get('python.exports')
if not d:
d = self._data.get('python.exports')
if d:
result = d.get(key, value)
if result is sentinel:
result = value
elif key not in common:
result = object.__getattribute__(self, key)
elif self._legacy:
result = self._legacy.get(key)
else:
result = self._data.get(key)
return result
def _validate_value(self, key, value, scheme=None):
if key in self.SYNTAX_VALIDATORS:
pattern, exclusions = self.SYNTAX_VALIDATORS[key]
if (scheme or self.scheme) not in exclusions:
m = pattern.match(value)
if not m:
raise MetadataInvalidError("'%s' is an invalid value for "
"the '%s' property" % (value,
key))
def __setattr__(self, key, value):
self._validate_value(key, value)
common = object.__getattribute__(self, 'common_keys')
mapped = object.__getattribute__(self, 'mapped_keys')
if key in mapped:
lk, _ = mapped[key]
if self._legacy:
if lk is None:
raise NotImplementedError
self._legacy[lk] = value
elif key not in ('commands', 'exports', 'modules', 'namespaces',
'classifiers'):
self._data[key] = value
else:
# special cases for PEP 459
d = self._data.setdefault('extensions', {})
if key == 'commands':
d['python.commands'] = value
elif key == 'classifiers':
d = d.setdefault('python.details', {})
d[key] = value
else:
d = d.setdefault('python.exports', {})
d[key] = value
elif key not in common:
object.__setattr__(self, key, value)
else:
if key == 'keywords':
if isinstance(value, string_types):
value = value.strip()
if value:
value = value.split()
else:
value = []
if self._legacy:
self._legacy[key] = value
else:
self._data[key] = value
@property
def name_and_version(self):
return _get_name_and_version(self.name, self.version, True)
@property
def provides(self):
if self._legacy:
result = self._legacy['Provides-Dist']
else:
result = self._data.setdefault('provides', [])
s = '%s (%s)' % (self.name, self.version)
if s not in result:
result.append(s)
return result
@provides.setter
def provides(self, value):
if self._legacy:
self._legacy['Provides-Dist'] = value
else:
self._data['provides'] = value
def get_requirements(self, reqts, extras=None, env=None):
"""
Base method to get dependencies, given a set of extras
to satisfy and an optional environment context.
:param reqts: A list of sometimes-wanted dependencies,
perhaps dependent on extras and environment.
:param extras: A list of optional components being requested.
:param env: An optional environment for marker evaluation.
"""
if self._legacy:
result = reqts
else:
result = []
extras = get_extras(extras or [], self.extras)
for d in reqts:
if 'extra' not in d and 'environment' not in d:
# unconditional
include = True
else:
if 'extra' not in d:
# Not extra-dependent - only environment-dependent
include = True
else:
include = d.get('extra') in extras
if include:
# Not excluded because of extras, check environment
marker = d.get('environment')
if marker:
include = interpret(marker, env)
if include:
result.extend(d['requires'])
for key in ('build', 'dev', 'test'):
e = ':%s:' % key
if e in extras:
extras.remove(e)
# A recursive call, but it should terminate since 'test'
# has been removed from the extras
reqts = self._data.get('%s_requires' % key, [])
result.extend(self.get_requirements(reqts, extras=extras,
env=env))
return result
@property
def dictionary(self):
if self._legacy:
return self._from_legacy()
return self._data
@property
def dependencies(self):
if self._legacy:
raise NotImplementedError
else:
return extract_by_key(self._data, self.DEPENDENCY_KEYS)
@dependencies.setter
def dependencies(self, value):
if self._legacy:
raise NotImplementedError
else:
self._data.update(value)
def _validate_mapping(self, mapping, scheme):
if mapping.get('metadata_version') != self.METADATA_VERSION:
raise MetadataUnrecognizedVersionError()
missing = []
for key, exclusions in self.MANDATORY_KEYS.items():
if key not in mapping:
if scheme not in exclusions:
missing.append(key)
if missing:
msg = 'Missing metadata items: %s' % ', '.join(missing)
raise MetadataMissingError(msg)
for k, v in mapping.items():
self._validate_value(k, v, scheme)
def validate(self):
if self._legacy:
missing, warnings = self._legacy.check(True)
if missing or warnings:
logger.warning('Metadata: missing: %s, warnings: %s',
missing, warnings)
else:
self._validate_mapping(self._data, self.scheme)
def todict(self):
if self._legacy:
return self._legacy.todict(True)
else:
result = extract_by_key(self._data, self.INDEX_KEYS)
return result
def _from_legacy(self):
assert self._legacy and not self._data
result = {
'metadata_version': self.METADATA_VERSION,
'generator': self.GENERATOR,
}
lmd = self._legacy.todict(True) # skip missing ones
for k in ('name', 'version', 'license', 'summary', 'description',
'classifier'):
if k in lmd:
if k == 'classifier':
nk = 'classifiers'
else:
nk = k
result[nk] = lmd[k]
kw = lmd.get('Keywords', [])
if kw == ['']:
kw = []
result['keywords'] = kw
keys = (('requires_dist', 'run_requires'),
('setup_requires_dist', 'build_requires'))
for ok, nk in keys:
if ok in lmd and lmd[ok]:
result[nk] = [{'requires': lmd[ok]}]
result['provides'] = self.provides
author = {}
maintainer = {}
return result
LEGACY_MAPPING = {
'name': 'Name',
'version': 'Version',
'license': 'License',
'summary': 'Summary',
'description': 'Description',
'classifiers': 'Classifier',
}
def _to_legacy(self):
def process_entries(entries):
reqts = set()
for e in entries:
extra = e.get('extra')
env = e.get('environment')
rlist = e['requires']
for r in rlist:
if not env and not extra:
reqts.add(r)
else:
marker = ''
if extra:
marker = 'extra == "%s"' % extra
if env:
if marker:
marker = '(%s) and %s' % (env, marker)
else:
marker = env
reqts.add(';'.join((r, marker)))
return reqts
assert self._data and not self._legacy
result = LegacyMetadata()
nmd = self._data
for nk, ok in self.LEGACY_MAPPING.items():
if nk in nmd:
result[ok] = nmd[nk]
r1 = process_entries(self.run_requires + self.meta_requires)
r2 = process_entries(self.build_requires + self.dev_requires)
if self.extras:
result['Provides-Extra'] = sorted(self.extras)
result['Requires-Dist'] = sorted(r1)
result['Setup-Requires-Dist'] = sorted(r2)
# TODO: other fields such as contacts
return result
def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True):
if [path, fileobj].count(None) != 1:
raise ValueError('Exactly one of path and fileobj is needed')
self.validate()
if legacy:
if self._legacy:
legacy_md = self._legacy
else:
legacy_md = self._to_legacy()
if path:
legacy_md.write(path, skip_unknown=skip_unknown)
else:
legacy_md.write_file(fileobj, skip_unknown=skip_unknown)
else:
if self._legacy:
d = self._from_legacy()
else:
d = self._data
if fileobj:
json.dump(d, fileobj, ensure_ascii=True, indent=2,
sort_keys=True)
else:
with codecs.open(path, 'w', 'utf-8') as f:
json.dump(d, f, ensure_ascii=True, indent=2,
sort_keys=True)
def add_requirements(self, requirements):
if self._legacy:
self._legacy.add_requirements(requirements)
else:
run_requires = self._data.setdefault('run_requires', [])
always = None
for entry in run_requires:
if 'environment' not in entry and 'extra' not in entry:
always = entry
break
if always is None:
always = { 'requires': requirements }
run_requires.insert(0, always)
else:
rset = set(always['requires']) | set(requirements)
always['requires'] = sorted(rset)
def __repr__(self):
name = self.name or '(no name)'
version = self.version or 'no version'
return '<%s %s %s (%s)>' % (self.__class__.__name__,
self.metadata_version, name, version)
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2016 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from __future__ import unicode_literals
import bisect
import io
import logging
import os
import pkgutil
import shutil
import sys
import types
import zipimport
from . import DistlibException
from .util import cached_property, get_cache_base, path_to_cache_dir, Cache
logger = logging.getLogger(__name__)
cache = None # created when needed
class ResourceCache(Cache):
def __init__(self, base=None):
if base is None:
# Use native string to avoid issues on 2.x: see Python #20140.
base = os.path.join(get_cache_base(), str('resource-cache'))
super(ResourceCache, self).__init__(base)
def is_stale(self, resource, path):
"""
Is the cache stale for the given resource?
:param resource: The :class:`Resource` being cached.
:param path: The path of the resource in the cache.
:return: True if the cache is stale.
"""
# Cache invalidation is a hard problem :-)
return True
def get(self, resource):
"""
Get a resource into the cache,
:param resource: A :class:`Resource` instance.
:return: The pathname of the resource in the cache.
"""
prefix, path = resource.finder.get_cache_info(resource)
if prefix is None:
result = path
else:
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
dirname = os.path.dirname(result)
if not os.path.isdir(dirname):
os.makedirs(dirname)
if not os.path.exists(result):
stale = True
else:
stale = self.is_stale(resource, path)
if stale:
# write the bytes of the resource to the cache location
with open(result, 'wb') as f:
f.write(resource.bytes)
return result
class ResourceBase(object):
def __init__(self, finder, name):
self.finder = finder
self.name = name
class Resource(ResourceBase):
"""
A class representing an in-package resource, such as a data file. This is
not normally instantiated by user code, but rather by a
:class:`ResourceFinder` which manages the resource.
"""
is_container = False # Backwards compatibility
def as_stream(self):
"""
Get the resource as a stream.
This is not a property to make it obvious that it returns a new stream
each time.
"""
return self.finder.get_stream(self)
@cached_property
def file_path(self):
global cache
if cache is None:
cache = ResourceCache()
return cache.get(self)
@cached_property
def bytes(self):
return self.finder.get_bytes(self)
@cached_property
def size(self):
return self.finder.get_size(self)
class ResourceContainer(ResourceBase):
is_container = True # Backwards compatibility
@cached_property
def resources(self):
return self.finder.get_resources(self)
class ResourceFinder(object):
"""
Resource finder for file system resources.
"""
if sys.platform.startswith('java'):
skipped_extensions = ('.pyc', '.pyo', '.class')
else:
skipped_extensions = ('.pyc', '.pyo')
def __init__(self, module):
self.module = module
self.loader = getattr(module, '__loader__', None)
self.base = os.path.dirname(getattr(module, '__file__', ''))
def _adjust_path(self, path):
return os.path.realpath(path)
def _make_path(self, resource_name):
# Issue #50: need to preserve type of path on Python 2.x
# like os.path._get_sep
if isinstance(resource_name, bytes): # should only happen on 2.x
sep = b'/'
else:
sep = '/'
parts = resource_name.split(sep)
parts.insert(0, self.base)
result = os.path.join(*parts)
return self._adjust_path(result)
def _find(self, path):
return os.path.exists(path)
def get_cache_info(self, resource):
return None, resource.path
def find(self, resource_name):
path = self._make_path(resource_name)
if not self._find(path):
result = None
else:
if self._is_directory(path):
result = ResourceContainer(self, resource_name)
else:
result = Resource(self, resource_name)
result.path = path
return result
def get_stream(self, resource):
return open(resource.path, 'rb')
def get_bytes(self, resource):
with open(resource.path, 'rb') as f:
return f.read()
def get_size(self, resource):
return os.path.getsize(resource.path)
def get_resources(self, resource):
def allowed(f):
return (f != '__pycache__' and not
f.endswith(self.skipped_extensions))
return set([f for f in os.listdir(resource.path) if allowed(f)])
def is_container(self, resource):
return self._is_directory(resource.path)
_is_directory = staticmethod(os.path.isdir)
def iterator(self, resource_name):
resource = self.find(resource_name)
if resource is not None:
todo = [resource]
while todo:
resource = todo.pop(0)
yield resource
if resource.is_container:
rname = resource.name
for name in resource.resources:
if not rname:
new_name = name
else:
new_name = '/'.join([rname, name])
child = self.find(new_name)
if child.is_container:
todo.append(child)
else:
yield child
class ZipResourceFinder(ResourceFinder):
"""
Resource finder for resources in .zip files.
"""
def __init__(self, module):
super(ZipResourceFinder, self).__init__(module)
archive = self.loader.archive
self.prefix_len = 1 + len(archive)
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
if hasattr(self.loader, '_files'):
self._files = self.loader._files
else:
self._files = zipimport._zip_directory_cache[archive]
self.index = sorted(self._files)
def _adjust_path(self, path):
return path
def _find(self, path):
path = path[self.prefix_len:]
if path in self._files:
result = True
else:
if path and path[-1] != os.sep:
path = path + os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
if not result:
logger.debug('_find failed: %r %r', path, self.loader.prefix)
else:
logger.debug('_find worked: %r %r', path, self.loader.prefix)
return result
def get_cache_info(self, resource):
prefix = self.loader.archive
path = resource.path[1 + len(prefix):]
return prefix, path
def get_bytes(self, resource):
return self.loader.get_data(resource.path)
def get_stream(self, resource):
return io.BytesIO(self.get_bytes(resource))
def get_size(self, resource):
path = resource.path[self.prefix_len:]
return self._files[path][3]
def get_resources(self, resource):
path = resource.path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
plen = len(path)
result = set()
i = bisect.bisect(self.index, path)
while i < len(self.index):
if not self.index[i].startswith(path):
break
s = self.index[i][plen:]
result.add(s.split(os.sep, 1)[0]) # only immediate children
i += 1
return result
def _is_directory(self, path):
path = path[self.prefix_len:]
if path and path[-1] != os.sep:
path += os.sep
i = bisect.bisect(self.index, path)
try:
result = self.index[i].startswith(path)
except IndexError:
result = False
return result
_finder_registry = {
type(None): ResourceFinder,
zipimport.zipimporter: ZipResourceFinder
}
try:
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external
try:
import _frozen_importlib_external as _fi
except ImportError:
import _frozen_importlib as _fi
_finder_registry[_fi.SourceFileLoader] = ResourceFinder
_finder_registry[_fi.FileFinder] = ResourceFinder
del _fi
except (ImportError, AttributeError):
pass
def register_finder(loader, finder_maker):
_finder_registry[type(loader)] = finder_maker
_finder_cache = {}
def finder(package):
"""
Return a resource finder for a package.
:param package: The name of the package.
:return: A :class:`ResourceFinder` instance for the package.
"""
if package in _finder_cache:
result = _finder_cache[package]
else:
if package not in sys.modules:
__import__(package)
module = sys.modules[package]
path = getattr(module, '__path__', None)
if path is None:
raise DistlibException('You cannot get a finder for a module, '
'only for a package')
loader = getattr(module, '__loader__', None)
finder_maker = _finder_registry.get(type(loader))
if finder_maker is None:
raise DistlibException('Unable to locate finder for %r' % package)
result = finder_maker(module)
_finder_cache[package] = result
return result
_dummy_module = types.ModuleType(str('__dummy__'))
def finder_for_path(path):
"""
Return a resource finder for a path, which should represent a container.
:param path: The path.
:return: A :class:`ResourceFinder` instance for the path.
"""
result = None
# calls any path hooks, gets importer into cache
pkgutil.get_importer(path)
loader = sys.path_importer_cache.get(path)
finder = _finder_registry.get(type(loader))
if finder:
module = _dummy_module
module.__file__ = os.path.join(path, '')
module.__loader__ = loader
result = finder(module)
return result
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2015 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from io import BytesIO
import logging
import os
import re
import struct
import sys
from .compat import sysconfig, detect_encoding, ZipFile
from .resources import finder
from .util import (FileOperator, get_export_entry, convert_path,
get_executable, in_venv)
logger = logging.getLogger(__name__)
_DEFAULT_MANIFEST = '''
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity version="1.0.0.0"
processorArchitecture="X86"
name="%s"
type="win32"/>
<!-- Identify the application security requirements. -->
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
<security>
<requestedPrivileges>
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
</requestedPrivileges>
</security>
</trustInfo>
</assembly>'''.strip()
# check if Python is called on the first line with this expression
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*-
if __name__ == '__main__':
import sys, re
def _resolve(module, func):
__import__(module)
mod = sys.modules[module]
parts = func.split('.')
result = getattr(mod, parts.pop(0))
for p in parts:
result = getattr(result, p)
return result
try:
sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
func = _resolve('%(module)s', '%(func)s')
rc = func() # None interpreted as 0
except Exception as e: # only supporting Python >= 2.6
sys.stderr.write('%%s\\n' %% e)
rc = 1
sys.exit(rc)
'''
def _enquote_executable(executable):
if ' ' in executable:
# make sure we quote only the executable in case of env
# for example /usr/bin/env "/dir with spaces/bin/jython"
# instead of "/usr/bin/env /dir with spaces/bin/jython"
# otherwise whole
if executable.startswith('/usr/bin/env '):
env, _executable = executable.split(' ', 1)
if ' ' in _executable and not _executable.startswith('"'):
executable = '%s "%s"' % (env, _executable)
else:
if not executable.startswith('"'):
executable = '"%s"' % executable
return executable
class ScriptMaker(object):
"""
A class to copy or create scripts from source scripts or callable
specifications.
"""
script_template = SCRIPT_TEMPLATE
executable = None # for shebangs
def __init__(self, source_dir, target_dir, add_launchers=True,
dry_run=False, fileop=None):
self.source_dir = source_dir
self.target_dir = target_dir
self.add_launchers = add_launchers
self.force = False
self.clobber = False
# It only makes sense to set mode bits on POSIX.
self.set_mode = (os.name == 'posix') or (os.name == 'java' and
os._name == 'posix')
self.variants = set(('', 'X.Y'))
self._fileop = fileop or FileOperator(dry_run)
self._is_nt = os.name == 'nt' or (
os.name == 'java' and os._name == 'nt')
def _get_alternate_executable(self, executable, options):
if options.get('gui', False) and self._is_nt: # pragma: no cover
dn, fn = os.path.split(executable)
fn = fn.replace('python', 'pythonw')
executable = os.path.join(dn, fn)
return executable
if sys.platform.startswith('java'): # pragma: no cover
def _is_shell(self, executable):
"""
Determine if the specified executable is a script
(contains a #! line)
"""
try:
with open(executable) as fp:
return fp.read(2) == '#!'
except (OSError, IOError):
logger.warning('Failed to open %s', executable)
return False
def _fix_jython_executable(self, executable):
if self._is_shell(executable):
# Workaround for Jython is not needed on Linux systems.
import java
if java.lang.System.getProperty('os.name') == 'Linux':
return executable
elif executable.lower().endswith('jython.exe'):
# Use wrapper exe for Jython on Windows
return executable
return '/usr/bin/env %s' % executable
def _get_shebang(self, encoding, post_interp=b'', options=None):
enquote = True
if self.executable:
executable = self.executable
enquote = False # assume this will be taken care of
elif not sysconfig.is_python_build():
executable = get_executable()
elif in_venv(): # pragma: no cover
executable = os.path.join(sysconfig.get_path('scripts'),
'python%s' % sysconfig.get_config_var('EXE'))
else: # pragma: no cover
executable = os.path.join(
sysconfig.get_config_var('BINDIR'),
'python%s%s' % (sysconfig.get_config_var('VERSION'),
sysconfig.get_config_var('EXE')))
if options:
executable = self._get_alternate_executable(executable, options)
if sys.platform.startswith('java'): # pragma: no cover
executable = self._fix_jython_executable(executable)
# Normalise case for Windows
executable = os.path.normcase(executable)
# If the user didn't specify an executable, it may be necessary to
# cater for executable paths with spaces (not uncommon on Windows)
if enquote:
executable = _enquote_executable(executable)
# Issue #51: don't use fsencode, since we later try to
# check that the shebang is decodable using utf-8.
executable = executable.encode('utf-8')
# in case of IronPython, play safe and enable frames support
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
and '-X:FullFrames' not in post_interp): # pragma: no cover
post_interp += b' -X:Frames'
shebang = b'#!' + executable + post_interp + b'\n'
# Python parser starts to read a script using UTF-8 until
# it gets a #coding:xxx cookie. The shebang has to be the
# first line of a file, the #coding:xxx cookie cannot be
# written before. So the shebang has to be decodable from
# UTF-8.
try:
shebang.decode('utf-8')
except UnicodeDecodeError: # pragma: no cover
raise ValueError(
'The shebang (%r) is not decodable from utf-8' % shebang)
# If the script is encoded to a custom encoding (use a
# #coding:xxx cookie), the shebang has to be decodable from
# the script encoding too.
if encoding != 'utf-8':
try:
shebang.decode(encoding)
except UnicodeDecodeError: # pragma: no cover
raise ValueError(
'The shebang (%r) is not decodable '
'from the script encoding (%r)' % (shebang, encoding))
return shebang
def _get_script_text(self, entry):
return self.script_template % dict(module=entry.prefix,
func=entry.suffix)
manifest = _DEFAULT_MANIFEST
def get_manifest(self, exename):
base = os.path.basename(exename)
return self.manifest % base
def _write_script(self, names, shebang, script_bytes, filenames, ext):
use_launcher = self.add_launchers and self._is_nt
linesep = os.linesep.encode('utf-8')
if not use_launcher:
script_bytes = shebang + linesep + script_bytes
else: # pragma: no cover
if ext == 'py':
launcher = self._get_launcher('t')
else:
launcher = self._get_launcher('w')
stream = BytesIO()
with ZipFile(stream, 'w') as zf:
zf.writestr('__main__.py', script_bytes)
zip_data = stream.getvalue()
script_bytes = launcher + shebang + linesep + zip_data
for name in names:
outname = os.path.join(self.target_dir, name)
if use_launcher: # pragma: no cover
n, e = os.path.splitext(outname)
if e.startswith('.py'):
outname = n
outname = '%s.exe' % outname
try:
self._fileop.write_binary_file(outname, script_bytes)
except Exception:
# Failed writing an executable - it might be in use.
logger.warning('Failed to write executable - trying to '
'use .deleteme logic')
dfname = '%s.deleteme' % outname
if os.path.exists(dfname):
os.remove(dfname) # Not allowed to fail here
os.rename(outname, dfname) # nor here
self._fileop.write_binary_file(outname, script_bytes)
logger.debug('Able to replace executable using '
'.deleteme logic')
try:
os.remove(dfname)
except Exception:
pass # still in use - ignore error
else:
if self._is_nt and not outname.endswith('.' + ext): # pragma: no cover
outname = '%s.%s' % (outname, ext)
if os.path.exists(outname) and not self.clobber:
logger.warning('Skipping existing file %s', outname)
continue
self._fileop.write_binary_file(outname, script_bytes)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
def _make_script(self, entry, filenames, options=None):
post_interp = b''
if options:
args = options.get('interpreter_args', [])
if args:
args = ' %s' % ' '.join(args)
post_interp = args.encode('utf-8')
shebang = self._get_shebang('utf-8', post_interp, options=options)
script = self._get_script_text(entry).encode('utf-8')
name = entry.name
scriptnames = set()
if '' in self.variants:
scriptnames.add(name)
if 'X' in self.variants:
scriptnames.add('%s%s' % (name, sys.version[0]))
if 'X.Y' in self.variants:
scriptnames.add('%s-%s' % (name, sys.version[:3]))
if options and options.get('gui', False):
ext = 'pyw'
else:
ext = 'py'
self._write_script(scriptnames, shebang, script, filenames, ext)
def _copy_script(self, script, filenames):
adjust = False
script = os.path.join(self.source_dir, convert_path(script))
outname = os.path.join(self.target_dir, os.path.basename(script))
if not self.force and not self._fileop.newer(script, outname):
logger.debug('not copying %s (up-to-date)', script)
return
# Always open the file, but ignore failures in dry-run mode --
# that way, we'll get accurate feedback if we can read the
# script.
try:
f = open(script, 'rb')
except IOError: # pragma: no cover
if not self.dry_run:
raise
f = None
else:
first_line = f.readline()
if not first_line: # pragma: no cover
logger.warning('%s: %s is an empty file (skipping)',
self.get_command_name(), script)
return
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
if match:
adjust = True
post_interp = match.group(1) or b''
if not adjust:
if f:
f.close()
self._fileop.copy_file(script, outname)
if self.set_mode:
self._fileop.set_executable_mode([outname])
filenames.append(outname)
else:
logger.info('copying and adjusting %s -> %s', script,
self.target_dir)
if not self._fileop.dry_run:
encoding, lines = detect_encoding(f.readline)
f.seek(0)
shebang = self._get_shebang(encoding, post_interp)
if b'pythonw' in first_line: # pragma: no cover
ext = 'pyw'
else:
ext = 'py'
n = os.path.basename(outname)
self._write_script([n], shebang, f.read(), filenames, ext)
if f:
f.close()
@property
def dry_run(self):
return self._fileop.dry_run
@dry_run.setter
def dry_run(self, value):
self._fileop.dry_run = value
if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'): # pragma: no cover
# Executable launcher support.
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
def _get_launcher(self, kind):
if struct.calcsize('P') == 8: # 64-bit
bits = '64'
else:
bits = '32'
name = '%s%s.exe' % (kind, bits)
# Issue 31: don't hardcode an absolute package name, but
# determine it relative to the current package
distlib_package = __name__.rsplit('.', 1)[0]
result = finder(distlib_package).find(name).bytes
return result
# Public API follows
def make(self, specification, options=None):
"""
Make a script.
:param specification: The specification, which is either a valid export
entry specification (to make a script from a
callable) or a filename (to make a script by
copying from a source location).
:param options: A dictionary of options controlling script generation.
:return: A list of all absolute pathnames written to.
"""
filenames = []
entry = get_export_entry(specification)
if entry is None:
self._copy_script(specification, filenames)
else:
self._make_script(entry, filenames, options=options)
return filenames
def make_multiple(self, specifications, options=None):
"""
Take a list of specifications and make scripts from them,
:param specifications: A list of specifications.
:return: A list of all absolute pathnames written to,
"""
filenames = []
for specification in specifications:
filenames.extend(self.make(specification, options))
return filenames
#
# Copyright (C) 2012-2016 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import codecs
from collections import deque
import contextlib
import csv
from glob import iglob as std_iglob
import io
import json
import logging
import os
import py_compile
import re
import shutil
import socket
try:
import ssl
except ImportError: # pragma: no cover
ssl = None
import subprocess
import sys
import tarfile
import tempfile
import textwrap
try:
import threading
except ImportError: # pragma: no cover
import dummy_threading as threading
import time
from . import DistlibException
from .compat import (string_types, text_type, shutil, raw_input, StringIO,
cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
splittype, HTTPHandler, BaseConfigurator, valid_ident,
Container, configparser, URLError, ZipFile, fsdecode,
unquote)
logger = logging.getLogger(__name__)
#
# Requirement parsing code for name + optional constraints + optional extras
#
# e.g. 'foo >= 1.2, < 2.0 [bar, baz]'
#
# The regex can seem a bit hairy, so we build it up out of smaller pieces
# which are manageable.
#
COMMA = r'\s*,\s*'
COMMA_RE = re.compile(COMMA)
IDENT = r'(\w|[.-])+'
EXTRA_IDENT = r'(\*|:(\*|\w+):|' + IDENT + ')'
VERSPEC = IDENT + r'\*?'
RELOP = '([<>=!~]=)|[<>]'
#
# The first relop is optional - if absent, will be taken as '~='
#
BARE_CONSTRAINTS = ('(' + RELOP + r')?\s*(' + VERSPEC + ')(' + COMMA + '(' +
RELOP + r')\s*(' + VERSPEC + '))*')
DIRECT_REF = '(from\s+(?P<diref>.*))'
#
# Either the bare constraints or the bare constraints in parentheses
#
CONSTRAINTS = (r'\(\s*(?P<c1>' + BARE_CONSTRAINTS + '|' + DIRECT_REF +
r')\s*\)|(?P<c2>' + BARE_CONSTRAINTS + '\s*)')
EXTRA_LIST = EXTRA_IDENT + '(' + COMMA + EXTRA_IDENT + ')*'
EXTRAS = r'\[\s*(?P<ex>' + EXTRA_LIST + r')?\s*\]'
REQUIREMENT = ('(?P<dn>' + IDENT + r')\s*(' + EXTRAS + r'\s*)?(\s*' +
CONSTRAINTS + ')?$')
REQUIREMENT_RE = re.compile(REQUIREMENT)
#
# Used to scan through the constraints
#
RELOP_IDENT = '(?P<op>' + RELOP + r')\s*(?P<vn>' + VERSPEC + ')'
RELOP_IDENT_RE = re.compile(RELOP_IDENT)
def parse_requirement(s):
def get_constraint(m):
d = m.groupdict()
return d['op'], d['vn']
result = None
m = REQUIREMENT_RE.match(s)
if m:
d = m.groupdict()
name = d['dn']
cons = d['c1'] or d['c2']
if not d['diref']:
url = None
else:
# direct reference
cons = None
url = d['diref'].strip()
if not cons:
cons = None
constr = ''
rs = d['dn']
else:
if cons[0] not in '<>!=':
cons = '~=' + cons
iterator = RELOP_IDENT_RE.finditer(cons)
cons = [get_constraint(m) for m in iterator]
rs = '%s (%s)' % (name, ', '.join(['%s %s' % con for con in cons]))
if not d['ex']:
extras = None
else:
extras = COMMA_RE.split(d['ex'])
result = Container(name=name, constraints=cons, extras=extras,
requirement=rs, source=s, url=url)
return result
def get_resources_dests(resources_root, rules):
"""Find destinations for resources files"""
def get_rel_path(base, path):
# normalizes and returns a lstripped-/-separated path
base = base.replace(os.path.sep, '/')
path = path.replace(os.path.sep, '/')
assert path.startswith(base)
return path[len(base):].lstrip('/')
destinations = {}
for base, suffix, dest in rules:
prefix = os.path.join(resources_root, base)
for abs_base in iglob(prefix):
abs_glob = os.path.join(abs_base, suffix)
for abs_path in iglob(abs_glob):
resource_file = get_rel_path(resources_root, abs_path)
if dest is None: # remove the entry if it was here
destinations.pop(resource_file, None)
else:
rel_path = get_rel_path(abs_base, abs_path)
rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
destinations[resource_file] = rel_dest + '/' + rel_path
return destinations
def in_venv():
if hasattr(sys, 'real_prefix'):
# virtualenv venvs
result = True
else:
# PEP 405 venvs
result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
return result
def get_executable():
# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
# changes to the stub launcher mean that sys.executable always points
# to the stub on macOS
# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
# in os.environ):
# result = os.environ['__PYVENV_LAUNCHER__']
# else:
# result = sys.executable
# return result
result = os.path.normcase(sys.executable)
if not isinstance(result, text_type):
result = fsdecode(result)
return result
def proceed(prompt, allowed_chars, error_prompt=None, default=None):
p = prompt
while True:
s = raw_input(p)
p = prompt
if not s and default:
s = default
if s:
c = s[0].lower()
if c in allowed_chars:
break
if error_prompt:
p = '%c: %s\n%s' % (c, error_prompt, prompt)
return c
def extract_by_key(d, keys):
if isinstance(keys, string_types):
keys = keys.split()
result = {}
for key in keys:
if key in d:
result[key] = d[key]
return result
def read_exports(stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getreader('utf-8')(stream)
# Try to load as JSON, falling back on legacy format
data = stream.read()
stream = StringIO(data)
try:
jdata = json.load(stream)
result = jdata['extensions']['python.exports']['exports']
for group, entries in result.items():
for k, v in entries.items():
s = '%s = %s' % (k, v)
entry = get_export_entry(s)
assert entry is not None
entries[k] = entry
return result
except Exception:
stream.seek(0, 0)
def read_stream(cp, stream):
if hasattr(cp, 'read_file'):
cp.read_file(stream)
else:
cp.readfp(stream)
cp = configparser.ConfigParser()
try:
read_stream(cp, stream)
except configparser.MissingSectionHeaderError:
stream.close()
data = textwrap.dedent(data)
stream = StringIO(data)
read_stream(cp, stream)
result = {}
for key in cp.sections():
result[key] = entries = {}
for name, value in cp.items(key):
s = '%s = %s' % (name, value)
entry = get_export_entry(s)
assert entry is not None
#entry.dist = self
entries[name] = entry
return result
def write_exports(exports, stream):
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getwriter('utf-8')(stream)
cp = configparser.ConfigParser()
for k, v in exports.items():
# TODO check k, v for valid values
cp.add_section(k)
for entry in v.values():
if entry.suffix is None:
s = entry.prefix
else:
s = '%s:%s' % (entry.prefix, entry.suffix)
if entry.flags:
s = '%s [%s]' % (s, ', '.join(entry.flags))
cp.set(k, entry.name, s)
cp.write(stream)
@contextlib.contextmanager
def tempdir():
td = tempfile.mkdtemp()
try:
yield td
finally:
shutil.rmtree(td)
@contextlib.contextmanager
def chdir(d):
cwd = os.getcwd()
try:
os.chdir(d)
yield
finally:
os.chdir(cwd)
@contextlib.contextmanager
def socket_timeout(seconds=15):
cto = socket.getdefaulttimeout()
try:
socket.setdefaulttimeout(seconds)
yield
finally:
socket.setdefaulttimeout(cto)
class cached_property(object):
def __init__(self, func):
self.func = func
#for attr in ('__name__', '__module__', '__doc__'):
# setattr(self, attr, getattr(func, attr, None))
def __get__(self, obj, cls=None):
if obj is None:
return self
value = self.func(obj)
object.__setattr__(obj, self.func.__name__, value)
#obj.__dict__[self.func.__name__] = value = self.func(obj)
return value
def convert_path(pathname):
"""Return 'pathname' as a name that will work on the native filesystem.
The path is split on '/' and put back together again using the current
directory separator. Needed because filenames in the setup script are
always supplied in Unix style, and have to be converted to the local
convention before we can actually use them in the filesystem. Raises
ValueError on non-Unix-ish systems if 'pathname' either starts or
ends with a slash.
"""
if os.sep == '/':
return pathname
if not pathname:
return pathname
if pathname[0] == '/':
raise ValueError("path '%s' cannot be absolute" % pathname)
if pathname[-1] == '/':
raise ValueError("path '%s' cannot end with '/'" % pathname)
paths = pathname.split('/')
while os.curdir in paths:
paths.remove(os.curdir)
if not paths:
return os.curdir
return os.path.join(*paths)
class FileOperator(object):
def __init__(self, dry_run=False):
self.dry_run = dry_run
self.ensured = set()
self._init_record()
def _init_record(self):
self.record = False
self.files_written = set()
self.dirs_created = set()
def record_as_written(self, path):
if self.record:
self.files_written.add(path)
def newer(self, source, target):
"""Tell if the target is newer than the source.
Returns true if 'source' exists and is more recently modified than
'target', or if 'source' exists and 'target' doesn't.
Returns false if both exist and 'target' is the same age or younger
than 'source'. Raise PackagingFileError if 'source' does not exist.
Note that this test is not very accurate: files created in the same
second will have the same "age".
"""
if not os.path.exists(source):
raise DistlibException("file '%r' does not exist" %
os.path.abspath(source))
if not os.path.exists(target):
return True
return os.stat(source).st_mtime > os.stat(target).st_mtime
def copy_file(self, infile, outfile, check=True):
"""Copy a file respecting dry-run and force flags.
"""
self.ensure_dir(os.path.dirname(outfile))
logger.info('Copying %s to %s', infile, outfile)
if not self.dry_run:
msg = None
if check:
if os.path.islink(outfile):
msg = '%s is a symlink' % outfile
elif os.path.exists(outfile) and not os.path.isfile(outfile):
msg = '%s is a non-regular file' % outfile
if msg:
raise ValueError(msg + ' which would be overwritten')
shutil.copyfile(infile, outfile)
self.record_as_written(outfile)
def copy_stream(self, instream, outfile, encoding=None):
assert not os.path.isdir(outfile)
self.ensure_dir(os.path.dirname(outfile))
logger.info('Copying stream %s to %s', instream, outfile)
if not self.dry_run:
if encoding is None:
outstream = open(outfile, 'wb')
else:
outstream = codecs.open(outfile, 'w', encoding=encoding)
try:
shutil.copyfileobj(instream, outstream)
finally:
outstream.close()
self.record_as_written(outfile)
def write_binary_file(self, path, data):
self.ensure_dir(os.path.dirname(path))
if not self.dry_run:
with open(path, 'wb') as f:
f.write(data)
self.record_as_written(path)
def write_text_file(self, path, data, encoding):
self.ensure_dir(os.path.dirname(path))
if not self.dry_run:
with open(path, 'wb') as f:
f.write(data.encode(encoding))
self.record_as_written(path)
def set_mode(self, bits, mask, files):
if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
# Set the executable bits (owner, group, and world) on
# all the files specified.
for f in files:
if self.dry_run:
logger.info("changing mode of %s", f)
else:
mode = (os.stat(f).st_mode | bits) & mask
logger.info("changing mode of %s to %o", f, mode)
os.chmod(f, mode)
set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
def ensure_dir(self, path):
path = os.path.abspath(path)
if path not in self.ensured and not os.path.exists(path):
self.ensured.add(path)
d, f = os.path.split(path)
self.ensure_dir(d)
logger.info('Creating %s' % path)
if not self.dry_run:
os.mkdir(path)
if self.record:
self.dirs_created.add(path)
def byte_compile(self, path, optimize=False, force=False, prefix=None):
dpath = cache_from_source(path, not optimize)
logger.info('Byte-compiling %s to %s', path, dpath)
if not self.dry_run:
if force or self.newer(path, dpath):
if not prefix:
diagpath = None
else:
assert path.startswith(prefix)
diagpath = path[len(prefix):]
py_compile.compile(path, dpath, diagpath, True) # raise error
self.record_as_written(dpath)
return dpath
def ensure_removed(self, path):
if os.path.exists(path):
if os.path.isdir(path) and not os.path.islink(path):
logger.debug('Removing directory tree at %s', path)
if not self.dry_run:
shutil.rmtree(path)
if self.record:
if path in self.dirs_created:
self.dirs_created.remove(path)
else:
if os.path.islink(path):
s = 'link'
else:
s = 'file'
logger.debug('Removing %s %s', s, path)
if not self.dry_run:
os.remove(path)
if self.record:
if path in self.files_written:
self.files_written.remove(path)
def is_writable(self, path):
result = False
while not result:
if os.path.exists(path):
result = os.access(path, os.W_OK)
break
parent = os.path.dirname(path)
if parent == path:
break
path = parent
return result
def commit(self):
"""
Commit recorded changes, turn off recording, return
changes.
"""
assert self.record
result = self.files_written, self.dirs_created
self._init_record()
return result
def rollback(self):
if not self.dry_run:
for f in list(self.files_written):
if os.path.exists(f):
os.remove(f)
# dirs should all be empty now, except perhaps for
# __pycache__ subdirs
# reverse so that subdirs appear before their parents
dirs = sorted(self.dirs_created, reverse=True)
for d in dirs:
flist = os.listdir(d)
if flist:
assert flist == ['__pycache__']
sd = os.path.join(d, flist[0])
os.rmdir(sd)
os.rmdir(d) # should fail if non-empty
self._init_record()
def resolve(module_name, dotted_path):
if module_name in sys.modules:
mod = sys.modules[module_name]
else:
mod = __import__(module_name)
if dotted_path is None:
result = mod
else:
parts = dotted_path.split('.')
result = getattr(mod, parts.pop(0))
for p in parts:
result = getattr(result, p)
return result
class ExportEntry(object):
def __init__(self, name, prefix, suffix, flags):
self.name = name
self.prefix = prefix
self.suffix = suffix
self.flags = flags
@cached_property
def value(self):
return resolve(self.prefix, self.suffix)
def __repr__(self): # pragma: no cover
return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
self.suffix, self.flags)
def __eq__(self, other):
if not isinstance(other, ExportEntry):
result = False
else:
result = (self.name == other.name and
self.prefix == other.prefix and
self.suffix == other.suffix and
self.flags == other.flags)
return result
__hash__ = object.__hash__
ENTRY_RE = re.compile(r'''(?P<name>(\w|[-.+])+)
\s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
\s*(\[\s*(?P<flags>\w+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
''', re.VERBOSE)
def get_export_entry(specification):
m = ENTRY_RE.search(specification)
if not m:
result = None
if '[' in specification or ']' in specification:
raise DistlibException("Invalid specification "
"'%s'" % specification)
else:
d = m.groupdict()
name = d['name']
path = d['callable']
colons = path.count(':')
if colons == 0:
prefix, suffix = path, None
else:
if colons != 1:
raise DistlibException("Invalid specification "
"'%s'" % specification)
prefix, suffix = path.split(':')
flags = d['flags']
if flags is None:
if '[' in specification or ']' in specification:
raise DistlibException("Invalid specification "
"'%s'" % specification)
flags = []
else:
flags = [f.strip() for f in flags.split(',')]
result = ExportEntry(name, prefix, suffix, flags)
return result
def get_cache_base(suffix=None):
"""
Return the default base location for distlib caches. If the directory does
not exist, it is created. Use the suffix provided for the base directory,
and default to '.distlib' if it isn't provided.
On Windows, if LOCALAPPDATA is defined in the environment, then it is
assumed to be a directory, and will be the parent directory of the result.
On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
directory - using os.expanduser('~') - will be the parent directory of
the result.
The result is just the directory '.distlib' in the parent directory as
determined above, or with the name specified with ``suffix``.
"""
if suffix is None:
suffix = '.distlib'
if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
result = os.path.expandvars('$localappdata')
else:
# Assume posix, or old Windows
result = os.path.expanduser('~')
# we use 'isdir' instead of 'exists', because we want to
# fail if there's a file with that name
if os.path.isdir(result):
usable = os.access(result, os.W_OK)
if not usable:
logger.warning('Directory exists but is not writable: %s', result)
else:
try:
os.makedirs(result)
usable = True
except OSError:
logger.warning('Unable to create %s', result, exc_info=True)
usable = False
if not usable:
result = tempfile.mkdtemp()
logger.warning('Default location unusable, using %s', result)
return os.path.join(result, suffix)
def path_to_cache_dir(path):
"""
Convert an absolute path to a directory name for use in a cache.
The algorithm used is:
#. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
#. Any occurrence of ``os.sep`` is replaced with ``'--'``.
#. ``'.cache'`` is appended.
"""
d, p = os.path.splitdrive(os.path.abspath(path))
if d:
d = d.replace(':', '---')
p = p.replace(os.sep, '--')
return d + p + '.cache'
def ensure_slash(s):
if not s.endswith('/'):
return s + '/'
return s
def parse_credentials(netloc):
username = password = None
if '@' in netloc:
prefix, netloc = netloc.split('@', 1)
if ':' not in prefix:
username = prefix
else:
username, password = prefix.split(':', 1)
return username, password, netloc
def get_process_umask():
result = os.umask(0o22)
os.umask(result)
return result
def is_string_sequence(seq):
result = True
i = None
for i, s in enumerate(seq):
if not isinstance(s, string_types):
result = False
break
assert i is not None
return result
PROJECT_NAME_AND_VERSION = re.compile('([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
'([a-z0-9_.+-]+)', re.I)
PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
def split_filename(filename, project_name=None):
"""
Extract name, version, python version from a filename (no extension)
Return name, version, pyver or None
"""
result = None
pyver = None
filename = unquote(filename).replace(' ', '-')
m = PYTHON_VERSION.search(filename)
if m:
pyver = m.group(1)
filename = filename[:m.start()]
if project_name and len(filename) > len(project_name) + 1:
m = re.match(re.escape(project_name) + r'\b', filename)
if m:
n = m.end()
result = filename[:n], filename[n + 1:], pyver
if result is None:
m = PROJECT_NAME_AND_VERSION.match(filename)
if m:
result = m.group(1), m.group(3), pyver
return result
# Allow spaces in name because of legacy dists like "Twisted Core"
NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
r'\(\s*(?P<ver>[^\s)]+)\)$')
def parse_name_and_version(p):
"""
A utility method used to get name and version from a string.
From e.g. a Provides-Dist value.
:param p: A value in a form 'foo (1.0)'
:return: The name and version as a tuple.
"""
m = NAME_VERSION_RE.match(p)
if not m:
raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
d = m.groupdict()
return d['name'].strip().lower(), d['ver']
def get_extras(requested, available):
result = set()
requested = set(requested or [])
available = set(available or [])
if '*' in requested:
requested.remove('*')
result |= available
for r in requested:
if r == '-':
result.add(r)
elif r.startswith('-'):
unwanted = r[1:]
if unwanted not in available:
logger.warning('undeclared extra: %s' % unwanted)
if unwanted in result:
result.remove(unwanted)
else:
if r not in available:
logger.warning('undeclared extra: %s' % r)
result.add(r)
return result
#
# Extended metadata functionality
#
def _get_external_data(url):
result = {}
try:
# urlopen might fail if it runs into redirections,
# because of Python issue #13696. Fixed in locators
# using a custom redirect handler.
resp = urlopen(url)
headers = resp.info()
ct = headers.get('Content-Type')
if not ct.startswith('application/json'):
logger.debug('Unexpected response for JSON request: %s', ct)
else:
reader = codecs.getreader('utf-8')(resp)
#data = reader.read().decode('utf-8')
#result = json.loads(data)
result = json.load(reader)
except Exception as e:
logger.exception('Failed to get external data for %s: %s', url, e)
return result
_external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
def get_project_data(name):
url = '%s/%s/project.json' % (name[0].upper(), name)
url = urljoin(_external_data_base_url, url)
result = _get_external_data(url)
return result
def get_package_data(name, version):
url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
url = urljoin(_external_data_base_url, url)
return _get_external_data(url)
class Cache(object):
"""
A class implementing a cache for resources that need to live in the file system
e.g. shared libraries. This class was moved from resources to here because it
could be used by other modules, e.g. the wheel module.
"""
def __init__(self, base):
"""
Initialise an instance.
:param base: The base directory where the cache should be located.
"""
# we use 'isdir' instead of 'exists', because we want to
# fail if there's a file with that name
if not os.path.isdir(base): # pragma: no cover
os.makedirs(base)
if (os.stat(base).st_mode & 0o77) != 0:
logger.warning('Directory \'%s\' is not private', base)
self.base = os.path.abspath(os.path.normpath(base))
def prefix_to_dir(self, prefix):
"""
Converts a resource prefix to a directory name in the cache.
"""
return path_to_cache_dir(prefix)
def clear(self):
"""
Clear the cache.
"""
not_removed = []
for fn in os.listdir(self.base):
fn = os.path.join(self.base, fn)
try:
if os.path.islink(fn) or os.path.isfile(fn):
os.remove(fn)
elif os.path.isdir(fn):
shutil.rmtree(fn)
except Exception:
not_removed.append(fn)
return not_removed
class EventMixin(object):
"""
A very simple publish/subscribe system.
"""
def __init__(self):
self._subscribers = {}
def add(self, event, subscriber, append=True):
"""
Add a subscriber for an event.
:param event: The name of an event.
:param subscriber: The subscriber to be added (and called when the
event is published).
:param append: Whether to append or prepend the subscriber to an
existing subscriber list for the event.
"""
subs = self._subscribers
if event not in subs:
subs[event] = deque([subscriber])
else:
sq = subs[event]
if append:
sq.append(subscriber)
else:
sq.appendleft(subscriber)
def remove(self, event, subscriber):
"""
Remove a subscriber for an event.
:param event: The name of an event.
:param subscriber: The subscriber to be removed.
"""
subs = self._subscribers
if event not in subs:
raise ValueError('No subscribers: %r' % event)
subs[event].remove(subscriber)
def get_subscribers(self, event):
"""
Return an iterator for the subscribers for an event.
:param event: The event to return subscribers for.
"""
return iter(self._subscribers.get(event, ()))
def publish(self, event, *args, **kwargs):
"""
Publish a event and return a list of values returned by its
subscribers.
:param event: The event to publish.
:param args: The positional arguments to pass to the event's
subscribers.
:param kwargs: The keyword arguments to pass to the event's
subscribers.
"""
result = []
for subscriber in self.get_subscribers(event):
try:
value = subscriber(event, *args, **kwargs)
except Exception:
logger.exception('Exception during event publication')
value = None
result.append(value)
logger.debug('publish %s: args = %s, kwargs = %s, result = %s',
event, args, kwargs, result)
return result
#
# Simple sequencing
#
class Sequencer(object):
def __init__(self):
self._preds = {}
self._succs = {}
self._nodes = set() # nodes with no preds/succs
def add_node(self, node):
self._nodes.add(node)
def remove_node(self, node, edges=False):
if node in self._nodes:
self._nodes.remove(node)
if edges:
for p in set(self._preds.get(node, ())):
self.remove(p, node)
for s in set(self._succs.get(node, ())):
self.remove(node, s)
# Remove empties
for k, v in list(self._preds.items()):
if not v:
del self._preds[k]
for k, v in list(self._succs.items()):
if not v:
del self._succs[k]
def add(self, pred, succ):
assert pred != succ
self._preds.setdefault(succ, set()).add(pred)
self._succs.setdefault(pred, set()).add(succ)
def remove(self, pred, succ):
assert pred != succ
try:
preds = self._preds[succ]
succs = self._succs[pred]
except KeyError: # pragma: no cover
raise ValueError('%r not a successor of anything' % succ)
try:
preds.remove(pred)
succs.remove(succ)
except KeyError: # pragma: no cover
raise ValueError('%r not a successor of %r' % (succ, pred))
def is_step(self, step):
return (step in self._preds or step in self._succs or
step in self._nodes)
def get_steps(self, final):
if not self.is_step(final):
raise ValueError('Unknown: %r' % final)
result = []
todo = []
seen = set()
todo.append(final)
while todo:
step = todo.pop(0)
if step in seen:
# if a step was already seen,
# move it to the end (so it will appear earlier
# when reversed on return) ... but not for the
# final step, as that would be confusing for
# users
if step != final:
result.remove(step)
result.append(step)
else:
seen.add(step)
result.append(step)
preds = self._preds.get(step, ())
todo.extend(preds)
return reversed(result)
@property
def strong_connections(self):
#http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
index_counter = [0]
stack = []
lowlinks = {}
index = {}
result = []
graph = self._succs
def strongconnect(node):
# set the depth index for this node to the smallest unused index
index[node] = index_counter[0]
lowlinks[node] = index_counter[0]
index_counter[0] += 1
stack.append(node)
# Consider successors
try:
successors = graph[node]
except Exception:
successors = []
for successor in successors:
if successor not in lowlinks:
# Successor has not yet been visited
strongconnect(successor)
lowlinks[node] = min(lowlinks[node],lowlinks[successor])
elif successor in stack:
# the successor is in the stack and hence in the current
# strongly connected component (SCC)
lowlinks[node] = min(lowlinks[node],index[successor])
# If `node` is a root node, pop the stack and generate an SCC
if lowlinks[node] == index[node]:
connected_component = []
while True:
successor = stack.pop()
connected_component.append(successor)
if successor == node: break
component = tuple(connected_component)
# storing the result
result.append(component)
for node in graph:
if node not in lowlinks:
strongconnect(node)
return result
@property
def dot(self):
result = ['digraph G {']
for succ in self._preds:
preds = self._preds[succ]
for pred in preds:
result.append(' %s -> %s;' % (pred, succ))
for node in self._nodes:
result.append(' %s;' % node)
result.append('}')
return '\n'.join(result)
#
# Unarchiving functionality for zip, tar, tgz, tbz, whl
#
ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip',
'.tgz', '.tbz', '.whl')
def unarchive(archive_filename, dest_dir, format=None, check=True):
def check_path(path):
if not isinstance(path, text_type):
path = path.decode('utf-8')
p = os.path.abspath(os.path.join(dest_dir, path))
if not p.startswith(dest_dir) or p[plen] != os.sep:
raise ValueError('path outside destination: %r' % p)
dest_dir = os.path.abspath(dest_dir)
plen = len(dest_dir)
archive = None
if format is None:
if archive_filename.endswith(('.zip', '.whl')):
format = 'zip'
elif archive_filename.endswith(('.tar.gz', '.tgz')):
format = 'tgz'
mode = 'r:gz'
elif archive_filename.endswith(('.tar.bz2', '.tbz')):
format = 'tbz'
mode = 'r:bz2'
elif archive_filename.endswith('.tar'):
format = 'tar'
mode = 'r'
else: # pragma: no cover
raise ValueError('Unknown format for %r' % archive_filename)
try:
if format == 'zip':
archive = ZipFile(archive_filename, 'r')
if check:
names = archive.namelist()
for name in names:
check_path(name)
else:
archive = tarfile.open(archive_filename, mode)
if check:
names = archive.getnames()
for name in names:
check_path(name)
if format != 'zip' and sys.version_info[0] < 3:
# See Python issue 17153. If the dest path contains Unicode,
# tarfile extraction fails on Python 2.x if a member path name
# contains non-ASCII characters - it leads to an implicit
# bytes -> unicode conversion using ASCII to decode.
for tarinfo in archive.getmembers():
if not isinstance(tarinfo.name, text_type):
tarinfo.name = tarinfo.name.decode('utf-8')
archive.extractall(dest_dir)
finally:
if archive:
archive.close()
def zip_dir(directory):
"""zip a directory tree into a BytesIO object"""
result = io.BytesIO()
dlen = len(directory)
with ZipFile(result, "w") as zf:
for root, dirs, files in os.walk(directory):
for name in files:
full = os.path.join(root, name)
rel = root[dlen:]
dest = os.path.join(rel, name)
zf.write(full, dest)
return result
#
# Simple progress bar
#
UNITS = ('', 'K', 'M', 'G','T','P')
class Progress(object):
unknown = 'UNKNOWN'
def __init__(self, minval=0, maxval=100):
assert maxval is None or maxval >= minval
self.min = self.cur = minval
self.max = maxval
self.started = None
self.elapsed = 0
self.done = False
def update(self, curval):
assert self.min <= curval
assert self.max is None or curval <= self.max
self.cur = curval
now = time.time()
if self.started is None:
self.started = now
else:
self.elapsed = now - self.started
def increment(self, incr):
assert incr >= 0
self.update(self.cur + incr)
def start(self):
self.update(self.min)
return self
def stop(self):
if self.max is not None:
self.update(self.max)
self.done = True
@property
def maximum(self):
return self.unknown if self.max is None else self.max
@property
def percentage(self):
if self.done:
result = '100 %'
elif self.max is None:
result = ' ?? %'
else:
v = 100.0 * (self.cur - self.min) / (self.max - self.min)
result = '%3d %%' % v
return result
def format_duration(self, duration):
if (duration <= 0) and self.max is None or self.cur == self.min:
result = '??:??:??'
#elif duration < 1:
# result = '--:--:--'
else:
result = time.strftime('%H:%M:%S', time.gmtime(duration))
return result
@property
def ETA(self):
if self.done:
prefix = 'Done'
t = self.elapsed
#import pdb; pdb.set_trace()
else:
prefix = 'ETA '
if self.max is None:
t = -1
elif self.elapsed == 0 or (self.cur == self.min):
t = 0
else:
#import pdb; pdb.set_trace()
t = float(self.max - self.min)
t /= self.cur - self.min
t = (t - 1) * self.elapsed
return '%s: %s' % (prefix, self.format_duration(t))
@property
def speed(self):
if self.elapsed == 0:
result = 0.0
else:
result = (self.cur - self.min) / self.elapsed
for unit in UNITS:
if result < 1000:
break
result /= 1000.0
return '%d %sB/s' % (result, unit)
#
# Glob functionality
#
RICH_GLOB = re.compile(r'\{([^}]*)\}')
_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
def iglob(path_glob):
"""Extended globbing function that supports ** and {opt1,opt2,opt3}."""
if _CHECK_RECURSIVE_GLOB.search(path_glob):
msg = """invalid glob %r: recursive glob "**" must be used alone"""
raise ValueError(msg % path_glob)
if _CHECK_MISMATCH_SET.search(path_glob):
msg = """invalid glob %r: mismatching set marker '{' or '}'"""
raise ValueError(msg % path_glob)
return _iglob(path_glob)
def _iglob(path_glob):
rich_path_glob = RICH_GLOB.split(path_glob, 1)
if len(rich_path_glob) > 1:
assert len(rich_path_glob) == 3, rich_path_glob
prefix, set, suffix = rich_path_glob
for item in set.split(','):
for path in _iglob(''.join((prefix, item, suffix))):
yield path
else:
if '**' not in path_glob:
for item in std_iglob(path_glob):
yield item
else:
prefix, radical = path_glob.split('**', 1)
if prefix == '':
prefix = '.'
if radical == '':
radical = '*'
else:
# we support both
radical = radical.lstrip('/')
radical = radical.lstrip('\\')
for path, dir, files in os.walk(prefix):
path = os.path.normpath(path)
for fn in _iglob(os.path.join(path, radical)):
yield fn
if ssl:
from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
CertificateError)
#
# HTTPSConnection which verifies certificates/matches domains
#
class HTTPSConnection(httplib.HTTPSConnection):
ca_certs = None # set this to the path to the certs file (.pem)
check_domain = True # only used if ca_certs is not None
# noinspection PyPropertyAccess
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if getattr(self, '_tunnel_host', False):
self.sock = sock
self._tunnel()
if not hasattr(ssl, 'SSLContext'):
# For 2.x
if self.ca_certs:
cert_reqs = ssl.CERT_REQUIRED
else:
cert_reqs = ssl.CERT_NONE
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
cert_reqs=cert_reqs,
ssl_version=ssl.PROTOCOL_SSLv23,
ca_certs=self.ca_certs)
else: # pragma: no cover
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.options |= ssl.OP_NO_SSLv2
if self.cert_file:
context.load_cert_chain(self.cert_file, self.key_file)
kwargs = {}
if self.ca_certs:
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(cafile=self.ca_certs)
if getattr(ssl, 'HAS_SNI', False):
kwargs['server_hostname'] = self.host
self.sock = context.wrap_socket(sock, **kwargs)
if self.ca_certs and self.check_domain:
try:
match_hostname(self.sock.getpeercert(), self.host)
logger.debug('Host verified: %s', self.host)
except CertificateError: # pragma: no cover
self.sock.shutdown(socket.SHUT_RDWR)
self.sock.close()
raise
class HTTPSHandler(BaseHTTPSHandler):
def __init__(self, ca_certs, check_domain=True):
BaseHTTPSHandler.__init__(self)
self.ca_certs = ca_certs
self.check_domain = check_domain
def _conn_maker(self, *args, **kwargs):
"""
This is called to create a connection instance. Normally you'd
pass a connection class to do_open, but it doesn't actually check for
a class, and just expects a callable. As long as we behave just as a
constructor would have, we should be OK. If it ever changes so that
we *must* pass a class, we'll create an UnsafeHTTPSConnection class
which just sets check_domain to False in the class definition, and
choose which one to pass to do_open.
"""
result = HTTPSConnection(*args, **kwargs)
if self.ca_certs:
result.ca_certs = self.ca_certs
result.check_domain = self.check_domain
return result
def https_open(self, req):
try:
return self.do_open(self._conn_maker, req)
except URLError as e:
if 'certificate verify failed' in str(e.reason):
raise CertificateError('Unable to verify server certificate '
'for %s' % req.host)
else:
raise
#
# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
# Middle proxy using HTTP listens on port 443, or an index mistakenly serves
# HTML containing a http://xyz link when it should be https://xyz),
# you can use the following handler class, which does not allow HTTP traffic.
#
# It works by inheriting from HTTPHandler - so build_opener won't add a
# handler for HTTP itself.
#
class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
def http_open(self, req):
raise URLError('Unexpected HTTP request on what should be a secure '
'connection: %s' % req)
#
# XML-RPC with timeouts
#
_ver_info = sys.version_info[:2]
if _ver_info == (2, 6):
class HTTP(httplib.HTTP):
def __init__(self, host='', port=None, **kwargs):
if port == 0: # 0 means use port 0, not the default port
port = None
self._setup(self._connection_class(host, port, **kwargs))
if ssl:
class HTTPS(httplib.HTTPS):
def __init__(self, host='', port=None, **kwargs):
if port == 0: # 0 means use port 0, not the default port
port = None
self._setup(self._connection_class(host, port, **kwargs))
class Transport(xmlrpclib.Transport):
def __init__(self, timeout, use_datetime=0):
self.timeout = timeout
xmlrpclib.Transport.__init__(self, use_datetime)
def make_connection(self, host):
h, eh, x509 = self.get_host_info(host)
if _ver_info == (2, 6):
result = HTTP(h, timeout=self.timeout)
else:
if not self._connection or host != self._connection[0]:
self._extra_headers = eh
self._connection = host, httplib.HTTPConnection(h)
result = self._connection[1]
return result
if ssl:
class SafeTransport(xmlrpclib.SafeTransport):
def __init__(self, timeout, use_datetime=0):
self.timeout = timeout
xmlrpclib.SafeTransport.__init__(self, use_datetime)
def make_connection(self, host):
h, eh, kwargs = self.get_host_info(host)
if not kwargs:
kwargs = {}
kwargs['timeout'] = self.timeout
if _ver_info == (2, 6):
result = HTTPS(host, None, **kwargs)
else:
if not self._connection or host != self._connection[0]:
self._extra_headers = eh
self._connection = host, httplib.HTTPSConnection(h, None,
**kwargs)
result = self._connection[1]
return result
class ServerProxy(xmlrpclib.ServerProxy):
def __init__(self, uri, **kwargs):
self.timeout = timeout = kwargs.pop('timeout', None)
# The above classes only come into play if a timeout
# is specified
if timeout is not None:
scheme, _ = splittype(uri)
use_datetime = kwargs.get('use_datetime', 0)
if scheme == 'https':
tcls = SafeTransport
else:
tcls = Transport
kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
self.transport = t
xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
#
# CSV functionality. This is provided because on 2.x, the csv module can't
# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
#
def _csv_open(fn, mode, **kwargs):
if sys.version_info[0] < 3:
mode += 'b'
else:
kwargs['newline'] = ''
return open(fn, mode, **kwargs)
class CSVBase(object):
defaults = {
'delimiter': str(','), # The strs are used because we need native
'quotechar': str('"'), # str in the csv API (2.x won't take
'lineterminator': str('\n') # Unicode)
}
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.stream.close()
class CSVReader(CSVBase):
def __init__(self, **kwargs):
if 'stream' in kwargs:
stream = kwargs['stream']
if sys.version_info[0] >= 3:
# needs to be a text stream
stream = codecs.getreader('utf-8')(stream)
self.stream = stream
else:
self.stream = _csv_open(kwargs['path'], 'r')
self.reader = csv.reader(self.stream, **self.defaults)
def __iter__(self):
return self
def next(self):
result = next(self.reader)
if sys.version_info[0] < 3:
for i, item in enumerate(result):
if not isinstance(item, text_type):
result[i] = item.decode('utf-8')
return result
__next__ = next
class CSVWriter(CSVBase):
def __init__(self, fn, **kwargs):
self.stream = _csv_open(fn, 'w')
self.writer = csv.writer(self.stream, **self.defaults)
def writerow(self, row):
if sys.version_info[0] < 3:
r = []
for item in row:
if isinstance(item, text_type):
item = item.encode('utf-8')
r.append(item)
row = r
self.writer.writerow(row)
#
# Configurator functionality
#
class Configurator(BaseConfigurator):
value_converters = dict(BaseConfigurator.value_converters)
value_converters['inc'] = 'inc_convert'
def __init__(self, config, base=None):
super(Configurator, self).__init__(config)
self.base = base or os.getcwd()
def configure_custom(self, config):
def convert(o):
if isinstance(o, (list, tuple)):
result = type(o)([convert(i) for i in o])
elif isinstance(o, dict):
if '()' in o:
result = self.configure_custom(o)
else:
result = {}
for k in o:
result[k] = convert(o[k])
else:
result = self.convert(o)
return result
c = config.pop('()')
if not callable(c):
c = self.resolve(c)
props = config.pop('.', None)
# Check for valid identifiers
args = config.pop('[]', ())
if args:
args = tuple([convert(o) for o in args])
items = [(k, convert(config[k])) for k in config if valid_ident(k)]
kwargs = dict(items)
result = c(*args, **kwargs)
if props:
for n, v in props.items():
setattr(result, n, convert(v))
return result
def __getitem__(self, key):
result = self.config[key]
if isinstance(result, dict) and '()' in result:
self.config[key] = result = self.configure_custom(result)
return result
def inc_convert(self, value):
"""Default converter for the inc:// protocol."""
if not os.path.isabs(value):
value = os.path.join(self.base, value)
with codecs.open(value, 'r', encoding='utf-8') as f:
result = json.load(f)
return result
#
# Mixin for running subprocesses and capturing their output
#
class SubprocessMixin(object):
def __init__(self, verbose=False, progress=None):
self.verbose = verbose
self.progress = progress
def reader(self, stream, context):
"""
Read lines from a subprocess' output stream and either pass to a progress
callable (if specified) or write progress information to sys.stderr.
"""
progress = self.progress
verbose = self.verbose
while True:
s = stream.readline()
if not s:
break
if progress is not None:
progress(s, context)
else:
if not verbose:
sys.stderr.write('.')
else:
sys.stderr.write(s.decode('utf-8'))
sys.stderr.flush()
stream.close()
def run_command(self, cmd, **kwargs):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, **kwargs)
t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
t1.start()
t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
t2.start()
p.wait()
t1.join()
t2.join()
if self.progress is not None:
self.progress('done.', 'main')
elif self.verbose:
sys.stderr.write('done.\n')
return p
def normalize_name(name):
"""Normalize a python package name a la PEP 503"""
# https://www.python.org/dev/peps/pep-0503/#normalized-names
return re.sub('[-_.]+', '-', name).lower()
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2016 The Python Software Foundation.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
"""
Implementation of a flexible versioning scheme providing support for PEP-440,
setuptools-compatible and semantic versioning.
"""
import logging
import re
from .compat import string_types
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
'LegacyVersion', 'LegacyMatcher',
'SemanticVersion', 'SemanticMatcher',
'UnsupportedVersionError', 'get_scheme']
logger = logging.getLogger(__name__)
class UnsupportedVersionError(ValueError):
"""This is an unsupported version."""
pass
class Version(object):
def __init__(self, s):
self._string = s = s.strip()
self._parts = parts = self.parse(s)
assert isinstance(parts, tuple)
assert len(parts) > 0
def parse(self, s):
raise NotImplementedError('please implement in a subclass')
def _check_compatible(self, other):
if type(self) != type(other):
raise TypeError('cannot compare %r and %r' % (self, other))
def __eq__(self, other):
self._check_compatible(other)
return self._parts == other._parts
def __ne__(self, other):
return not self.__eq__(other)
def __lt__(self, other):
self._check_compatible(other)
return self._parts < other._parts
def __gt__(self, other):
return not (self.__lt__(other) or self.__eq__(other))
def __le__(self, other):
return self.__lt__(other) or self.__eq__(other)
def __ge__(self, other):
return self.__gt__(other) or self.__eq__(other)
# See http://docs.python.org/reference/datamodel#object.__hash__
def __hash__(self):
return hash(self._parts)
def __repr__(self):
return "%s('%s')" % (self.__class__.__name__, self._string)
def __str__(self):
return self._string
@property
def is_prerelease(self):
raise NotImplementedError('Please implement in subclasses.')
class Matcher(object):
version_class = None
dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?")
comp_re = re.compile(r'^(<=|>=|<|>|!=|={2,3}|~=)?\s*([^\s,]+)$')
num_re = re.compile(r'^\d+(\.\d+)*$')
# value is either a callable or the name of a method
_operators = {
'<': lambda v, c, p: v < c,
'>': lambda v, c, p: v > c,
'<=': lambda v, c, p: v == c or v < c,
'>=': lambda v, c, p: v == c or v > c,
'==': lambda v, c, p: v == c,
'===': lambda v, c, p: v == c,
# by default, compatible => >=.
'~=': lambda v, c, p: v == c or v > c,
'!=': lambda v, c, p: v != c,
}
def __init__(self, s):
if self.version_class is None:
raise ValueError('Please specify a version class')
self._string = s = s.strip()
m = self.dist_re.match(s)
if not m:
raise ValueError('Not valid: %r' % s)
groups = m.groups('')
self.name = groups[0].strip()
self.key = self.name.lower() # for case-insensitive comparisons
clist = []
if groups[2]:
constraints = [c.strip() for c in groups[2].split(',')]
for c in constraints:
m = self.comp_re.match(c)
if not m:
raise ValueError('Invalid %r in %r' % (c, s))
groups = m.groups()
op = groups[0] or '~='
s = groups[1]
if s.endswith('.*'):
if op not in ('==', '!='):
raise ValueError('\'.*\' not allowed for '
'%r constraints' % op)
# Could be a partial version (e.g. for '2.*') which
# won't parse as a version, so keep it as a string
vn, prefix = s[:-2], True
if not self.num_re.match(vn):
# Just to check that vn is a valid version
self.version_class(vn)
else:
# Should parse as a version, so we can create an
# instance for the comparison
vn, prefix = self.version_class(s), False
clist.append((op, vn, prefix))
self._parts = tuple(clist)
def match(self, version):
"""
Check if the provided version matches the constraints.
:param version: The version to match against this instance.
:type version: String or :class:`Version` instance.
"""
if isinstance(version, string_types):
version = self.version_class(version)
for operator, constraint, prefix in self._parts:
f = self._operators.get(operator)
if isinstance(f, string_types):
f = getattr(self, f)
if not f:
msg = ('%r not implemented '
'for %s' % (operator, self.__class__.__name__))
raise NotImplementedError(msg)
if not f(version, constraint, prefix):
return False
return True
@property
def exact_version(self):
result = None
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
result = self._parts[0][1]
return result
def _check_compatible(self, other):
if type(self) != type(other) or self.name != other.name:
raise TypeError('cannot compare %s and %s' % (self, other))
def __eq__(self, other):
self._check_compatible(other)
return self.key == other.key and self._parts == other._parts
def __ne__(self, other):
return not self.__eq__(other)
# See http://docs.python.org/reference/datamodel#object.__hash__
def __hash__(self):
return hash(self.key) + hash(self._parts)
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._string)
def __str__(self):
return self._string
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
r'(\.(post)(\d+))?(\.(dev)(\d+))?'
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$')
def _pep_440_key(s):
s = s.strip()
m = PEP440_VERSION_RE.match(s)
if not m:
raise UnsupportedVersionError('Not a valid version: %s' % s)
groups = m.groups()
nums = tuple(int(v) for v in groups[1].split('.'))
while len(nums) > 1 and nums[-1] == 0:
nums = nums[:-1]
if not groups[0]:
epoch = 0
else:
epoch = int(groups[0])
pre = groups[4:6]
post = groups[7:9]
dev = groups[10:12]
local = groups[13]
if pre == (None, None):
pre = ()
else:
pre = pre[0], int(pre[1])
if post == (None, None):
post = ()
else:
post = post[0], int(post[1])
if dev == (None, None):
dev = ()
else:
dev = dev[0], int(dev[1])
if local is None:
local = ()
else:
parts = []
for part in local.split('.'):
# to ensure that numeric compares as > lexicographic, avoid
# comparing them directly, but encode a tuple which ensures
# correct sorting
if part.isdigit():
part = (1, int(part))
else:
part = (0, part)
parts.append(part)
local = tuple(parts)
if not pre:
# either before pre-release, or final release and after
if not post and dev:
# before pre-release
pre = ('a', -1) # to sort before a0
else:
pre = ('z',) # to sort after all pre-releases
# now look at the state of post and dev.
if not post:
post = ('_',) # sort before 'a'
if not dev:
dev = ('final',)
#print('%s -> %s' % (s, m.groups()))
return epoch, nums, pre, post, dev, local
_normalized_key = _pep_440_key
class NormalizedVersion(Version):
"""A rational version.
Good:
1.2 # equivalent to "1.2.0"
1.2.0
1.2a1
1.2.3a2
1.2.3b1
1.2.3c1
1.2.3.4
TODO: fill this out
Bad:
1 # minimum two numbers
1.2a # release level must have a release serial
1.2.3b
"""
def parse(self, s):
result = _normalized_key(s)
# _normalized_key loses trailing zeroes in the release
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
# However, PEP 440 prefix matching needs it: for example,
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
m = PEP440_VERSION_RE.match(s) # must succeed
groups = m.groups()
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
return result
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
@property
def is_prerelease(self):
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
def _match_prefix(x, y):
x = str(x)
y = str(y)
if x == y:
return True
if not x.startswith(y):
return False
n = len(y)
return x[n] == '.'
class NormalizedMatcher(Matcher):
version_class = NormalizedVersion
# value is either a callable or the name of a method
_operators = {
'~=': '_match_compatible',
'<': '_match_lt',
'>': '_match_gt',
'<=': '_match_le',
'>=': '_match_ge',
'==': '_match_eq',
'===': '_match_arbitrary',
'!=': '_match_ne',
}
def _adjust_local(self, version, constraint, prefix):
if prefix:
strip_local = '+' not in constraint and version._parts[-1]
else:
# both constraint and version are
# NormalizedVersion instances.
# If constraint does not have a local component,
# ensure the version doesn't, either.
strip_local = not constraint._parts[-1] and version._parts[-1]
if strip_local:
s = version._string.split('+', 1)[0]
version = self.version_class(s)
return version, constraint
def _match_lt(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version >= constraint:
return False
release_clause = constraint._release_clause
pfx = '.'.join([str(i) for i in release_clause])
return not _match_prefix(version, pfx)
def _match_gt(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version <= constraint:
return False
release_clause = constraint._release_clause
pfx = '.'.join([str(i) for i in release_clause])
return not _match_prefix(version, pfx)
def _match_le(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
return version <= constraint
def _match_ge(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
return version >= constraint
def _match_eq(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if not prefix:
result = (version == constraint)
else:
result = _match_prefix(version, constraint)
return result
def _match_arbitrary(self, version, constraint, prefix):
return str(version) == str(constraint)
def _match_ne(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if not prefix:
result = (version != constraint)
else:
result = not _match_prefix(version, constraint)
return result
def _match_compatible(self, version, constraint, prefix):
version, constraint = self._adjust_local(version, constraint, prefix)
if version == constraint:
return True
if version < constraint:
return False
# if not prefix:
# return True
release_clause = constraint._release_clause
if len(release_clause) > 1:
release_clause = release_clause[:-1]
pfx = '.'.join([str(i) for i in release_clause])
return _match_prefix(version, pfx)
_REPLACEMENTS = (
(re.compile('[.+-]$'), ''), # remove trailing puncts
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
(re.compile('^[.-]'), ''), # remove leading puncts
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
(re.compile(r'\b(pre-alpha|prealpha)\b'),
'pre.alpha'), # standardise
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
)
_SUFFIX_REPLACEMENTS = (
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
(re.compile('[,*")([\]]'), ''), # remove unwanted chars
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
(re.compile(r'\.$'), ''), # trailing '.'
)
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
def _suggest_semantic_version(s):
"""
Try to suggest a semantic form for a version for which
_suggest_normalized_version couldn't come up with anything.
"""
result = s.strip().lower()
for pat, repl in _REPLACEMENTS:
result = pat.sub(repl, result)
if not result:
result = '0.0.0'
# Now look for numeric prefix, and separate it out from
# the rest.
#import pdb; pdb.set_trace()
m = _NUMERIC_PREFIX.match(result)
if not m:
prefix = '0.0.0'
suffix = result
else:
prefix = m.groups()[0].split('.')
prefix = [int(i) for i in prefix]
while len(prefix) < 3:
prefix.append(0)
if len(prefix) == 3:
suffix = result[m.end():]
else:
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
prefix = prefix[:3]
prefix = '.'.join([str(i) for i in prefix])
suffix = suffix.strip()
if suffix:
#import pdb; pdb.set_trace()
# massage the suffix.
for pat, repl in _SUFFIX_REPLACEMENTS:
suffix = pat.sub(repl, suffix)
if not suffix:
result = prefix
else:
sep = '-' if 'dev' in suffix else '+'
result = prefix + sep + suffix
if not is_semver(result):
result = None
return result
def _suggest_normalized_version(s):
"""Suggest a normalized version close to the given version string.
If you have a version string that isn't rational (i.e. NormalizedVersion
doesn't like it) then you might be able to get an equivalent (or close)
rational version from this function.
This does a number of simple normalizations to the given string, based
on observation of versions currently in use on PyPI. Given a dump of
those version during PyCon 2009, 4287 of them:
- 2312 (53.93%) match NormalizedVersion without change
with the automatic suggestion
- 3474 (81.04%) match when using this suggestion method
@param s {str} An irrational version string.
@returns A rational version string, or None, if couldn't determine one.
"""
try:
_normalized_key(s)
return s # already rational
except UnsupportedVersionError:
pass
rs = s.lower()
# part of this could use maketrans
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
('beta', 'b'), ('rc', 'c'), ('-final', ''),
('-pre', 'c'),
('-release', ''), ('.release', ''), ('-stable', ''),
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
('final', '')):
rs = rs.replace(orig, repl)
# if something ends with dev or pre, we add a 0
rs = re.sub(r"pre$", r"pre0", rs)
rs = re.sub(r"dev$", r"dev0", rs)
# if we have something like "b-2" or "a.2" at the end of the
# version, that is probably beta, alpha, etc
# let's remove the dash or dot
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
# 1.0-dev-r371 -> 1.0.dev371
# 0.1-dev-r79 -> 0.1.dev79
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
# Clean: v0.3, v1.0
if rs.startswith('v'):
rs = rs[1:]
# Clean leading '0's on numbers.
#TODO: unintended side-effect on, e.g., "2003.05.09"
# PyPI stats: 77 (~2%) better
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
# zero.
# PyPI stats: 245 (7.56%) better
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
# the 'dev-rNNN' tag is a dev tag
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
# clean the - when used as a pre delimiter
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
# a terminal "dev" or "devel" can be changed into ".dev0"
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
# a terminal "dev" can be changed into ".dev0"
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
# a terminal "final" or "stable" can be removed
rs = re.sub(r"(final|stable)$", "", rs)
# The 'r' and the '-' tags are post release tags
# 0.4a1.r10 -> 0.4a1.post10
# 0.9.33-17222 -> 0.9.33.post17222
# 0.9.33-r17222 -> 0.9.33.post17222
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
# Clean 'r' instead of 'dev' usage:
# 0.9.33+r17222 -> 0.9.33.dev17222
# 1.0dev123 -> 1.0.dev123
# 1.0.git123 -> 1.0.dev123
# 1.0.bzr123 -> 1.0.dev123
# 0.1a0dev.123 -> 0.1a0.dev123
# PyPI stats: ~150 (~4%) better
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
# 0.2.pre1 -> 0.2c1
# 0.2-c1 -> 0.2c1
# 1.0preview123 -> 1.0c123
# PyPI stats: ~21 (0.62%) better
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
# Tcl/Tk uses "px" for their post release markers
rs = re.sub(r"p(\d+)$", r".post\1", rs)
try:
_normalized_key(rs)
except UnsupportedVersionError:
rs = None
return rs
#
# Legacy version processing (distribute-compatible)
#
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
_VERSION_REPLACE = {
'pre': 'c',
'preview': 'c',
'-': 'final-',
'rc': 'c',
'dev': '@',
'': None,
'.': None,
}
def _legacy_key(s):
def get_parts(s):
result = []
for p in _VERSION_PART.split(s.lower()):
p = _VERSION_REPLACE.get(p, p)
if p:
if '0' <= p[:1] <= '9':
p = p.zfill(8)
else:
p = '*' + p
result.append(p)
result.append('*final')
return result
result = []
for p in get_parts(s):
if p.startswith('*'):
if p < '*final':
while result and result[-1] == '*final-':
result.pop()
while result and result[-1] == '00000000':
result.pop()
result.append(p)
return tuple(result)
class LegacyVersion(Version):
def parse(self, s):
return _legacy_key(s)
@property
def is_prerelease(self):
result = False
for x in self._parts:
if (isinstance(x, string_types) and x.startswith('*') and
x < '*final'):
result = True
break
return result
class LegacyMatcher(Matcher):
version_class = LegacyVersion
_operators = dict(Matcher._operators)
_operators['~='] = '_match_compatible'
numeric_re = re.compile('^(\d+(\.\d+)*)')
def _match_compatible(self, version, constraint, prefix):
if version < constraint:
return False
m = self.numeric_re.match(str(constraint))
if not m:
logger.warning('Cannot compute compatible match for version %s '
' and constraint %s', version, constraint)
return True
s = m.groups()[0]
if '.' in s:
s = s.rsplit('.', 1)[0]
return _match_prefix(version, s)
#
# Semantic versioning
#
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
def is_semver(s):
return _SEMVER_RE.match(s)
def _semantic_key(s):
def make_tuple(s, absent):
if s is None:
result = (absent,)
else:
parts = s[1:].split('.')
# We can't compare ints and strings on Python 3, so fudge it
# by zero-filling numeric values so simulate a numeric comparison
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
return result
m = is_semver(s)
if not m:
raise UnsupportedVersionError(s)
groups = m.groups()
major, minor, patch = [int(i) for i in groups[:3]]
# choose the '|' and '*' so that versions sort correctly
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
return (major, minor, patch), pre, build
class SemanticVersion(Version):
def parse(self, s):
return _semantic_key(s)
@property
def is_prerelease(self):
return self._parts[1][0] != '|'
class SemanticMatcher(Matcher):
version_class = SemanticVersion
class VersionScheme(object):
def __init__(self, key, matcher, suggester=None):
self.key = key
self.matcher = matcher
self.suggester = suggester
def is_valid_version(self, s):
try:
self.matcher.version_class(s)
result = True
except UnsupportedVersionError:
result = False
return result
def is_valid_matcher(self, s):
try:
self.matcher(s)
result = True
except UnsupportedVersionError:
result = False
return result
def is_valid_constraint_list(self, s):
"""
Used for processing some metadata fields
"""
return self.is_valid_matcher('dummy_name (%s)' % s)
def suggest(self, s):
if self.suggester is None:
result = None
else:
result = self.suggester(s)
return result
_SCHEMES = {
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
_suggest_normalized_version),
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
_suggest_semantic_version),
}
_SCHEMES['default'] = _SCHEMES['normalized']
def get_scheme(name):
if name not in _SCHEMES:
raise ValueError('unknown scheme name: %r' % name)
return _SCHEMES[name]
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2016 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
from __future__ import unicode_literals
import base64
import codecs
import datetime
import distutils.util
from email import message_from_file
import hashlib
import imp
import json
import logging
import os
import posixpath
import re
import shutil
import sys
import tempfile
import zipfile
from . import __version__, DistlibException
from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
from .database import InstalledDistribution
from .metadata import Metadata, METADATA_FILENAME
from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
cached_property, get_cache_base, read_exports, tempdir)
from .version import NormalizedVersion, UnsupportedVersionError
logger = logging.getLogger(__name__)
cache = None # created when needed
if hasattr(sys, 'pypy_version_info'):
IMP_PREFIX = 'pp'
elif sys.platform.startswith('java'):
IMP_PREFIX = 'jy'
elif sys.platform == 'cli':
IMP_PREFIX = 'ip'
else:
IMP_PREFIX = 'cp'
VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
if not VER_SUFFIX: # pragma: no cover
VER_SUFFIX = '%s%s' % sys.version_info[:2]
PYVER = 'py' + VER_SUFFIX
IMPVER = IMP_PREFIX + VER_SUFFIX
ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_')
ABI = sysconfig.get_config_var('SOABI')
if ABI and ABI.startswith('cpython-'):
ABI = ABI.replace('cpython-', 'cp')
else:
def _derive_abi():
parts = ['cp', VER_SUFFIX]
if sysconfig.get_config_var('Py_DEBUG'):
parts.append('d')
if sysconfig.get_config_var('WITH_PYMALLOC'):
parts.append('m')
if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4:
parts.append('u')
return ''.join(parts)
ABI = _derive_abi()
del _derive_abi
FILENAME_RE = re.compile(r'''
(?P<nm>[^-]+)
-(?P<vn>\d+[^-]*)
(-(?P<bn>\d+[^-]*))?
-(?P<py>\w+\d+(\.\w+\d+)*)
-(?P<bi>\w+)
-(?P<ar>\w+(\.\w+)*)
\.whl$
''', re.IGNORECASE | re.VERBOSE)
NAME_VERSION_RE = re.compile(r'''
(?P<nm>[^-]+)
-(?P<vn>\d+[^-]*)
(-(?P<bn>\d+[^-]*))?$
''', re.IGNORECASE | re.VERBOSE)
SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$')
SHEBANG_PYTHON = b'#!python'
SHEBANG_PYTHONW = b'#!pythonw'
if os.sep == '/':
to_posix = lambda o: o
else:
to_posix = lambda o: o.replace(os.sep, '/')
class Mounter(object):
def __init__(self):
self.impure_wheels = {}
self.libs = {}
def add(self, pathname, extensions):
self.impure_wheels[pathname] = extensions
self.libs.update(extensions)
def remove(self, pathname):
extensions = self.impure_wheels.pop(pathname)
for k, v in extensions:
if k in self.libs:
del self.libs[k]
def find_module(self, fullname, path=None):
if fullname in self.libs:
result = self
else:
result = None
return result
def load_module(self, fullname):
if fullname in sys.modules:
result = sys.modules[fullname]
else:
if fullname not in self.libs:
raise ImportError('unable to find extension for %s' % fullname)
result = imp.load_dynamic(fullname, self.libs[fullname])
result.__loader__ = self
parts = fullname.rsplit('.', 1)
if len(parts) > 1:
result.__package__ = parts[0]
return result
_hook = Mounter()
class Wheel(object):
"""
Class to build and install from Wheel files (PEP 427).
"""
wheel_version = (1, 1)
hash_kind = 'sha256'
def __init__(self, filename=None, sign=False, verify=False):
"""
Initialise an instance using a (valid) filename.
"""
self.sign = sign
self.should_verify = verify
self.buildver = ''
self.pyver = [PYVER]
self.abi = ['none']
self.arch = ['any']
self.dirname = os.getcwd()
if filename is None:
self.name = 'dummy'
self.version = '0.1'
self._filename = self.filename
else:
m = NAME_VERSION_RE.match(filename)
if m:
info = m.groupdict('')
self.name = info['nm']
# Reinstate the local version separator
self.version = info['vn'].replace('_', '-')
self.buildver = info['bn']
self._filename = self.filename
else:
dirname, filename = os.path.split(filename)
m = FILENAME_RE.match(filename)
if not m:
raise DistlibException('Invalid name or '
'filename: %r' % filename)
if dirname:
self.dirname = os.path.abspath(dirname)
self._filename = filename
info = m.groupdict('')
self.name = info['nm']
self.version = info['vn']
self.buildver = info['bn']
self.pyver = info['py'].split('.')
self.abi = info['bi'].split('.')
self.arch = info['ar'].split('.')
@property
def filename(self):
"""
Build and return a filename from the various components.
"""
if self.buildver:
buildver = '-' + self.buildver
else:
buildver = ''
pyver = '.'.join(self.pyver)
abi = '.'.join(self.abi)
arch = '.'.join(self.arch)
# replace - with _ as a local version separator
version = self.version.replace('-', '_')
return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver,
pyver, abi, arch)
@property
def exists(self):
path = os.path.join(self.dirname, self.filename)
return os.path.isfile(path)
@property
def tags(self):
for pyver in self.pyver:
for abi in self.abi:
for arch in self.arch:
yield pyver, abi, arch
@cached_property
def metadata(self):
pathname = os.path.join(self.dirname, self.filename)
name_ver = '%s-%s' % (self.name, self.version)
info_dir = '%s.dist-info' % name_ver
wrapper = codecs.getreader('utf-8')
with ZipFile(pathname, 'r') as zf:
wheel_metadata = self.get_wheel_metadata(zf)
wv = wheel_metadata['Wheel-Version'].split('.', 1)
file_version = tuple([int(i) for i in wv])
if file_version < (1, 1):
fn = 'METADATA'
else:
fn = METADATA_FILENAME
try:
metadata_filename = posixpath.join(info_dir, fn)
with zf.open(metadata_filename) as bf:
wf = wrapper(bf)
result = Metadata(fileobj=wf)
except KeyError:
raise ValueError('Invalid wheel, because %s is '
'missing' % fn)
return result
def get_wheel_metadata(self, zf):
name_ver = '%s-%s' % (self.name, self.version)
info_dir = '%s.dist-info' % name_ver
metadata_filename = posixpath.join(info_dir, 'WHEEL')
with zf.open(metadata_filename) as bf:
wf = codecs.getreader('utf-8')(bf)
message = message_from_file(wf)
return dict(message)
@cached_property
def info(self):
pathname = os.path.join(self.dirname, self.filename)
with ZipFile(pathname, 'r') as zf:
result = self.get_wheel_metadata(zf)
return result
def process_shebang(self, data):
m = SHEBANG_RE.match(data)
if m:
end = m.end()
shebang, data_after_shebang = data[:end], data[end:]
# Preserve any arguments after the interpreter
if b'pythonw' in shebang.lower():
shebang_python = SHEBANG_PYTHONW
else:
shebang_python = SHEBANG_PYTHON
m = SHEBANG_DETAIL_RE.match(shebang)
if m:
args = b' ' + m.groups()[-1]
else:
args = b''
shebang = shebang_python + args
data = shebang + data_after_shebang
else:
cr = data.find(b'\r')
lf = data.find(b'\n')
if cr < 0 or cr > lf:
term = b'\n'
else:
if data[cr:cr + 2] == b'\r\n':
term = b'\r\n'
else:
term = b'\r'
data = SHEBANG_PYTHON + term + data
return data
def get_hash(self, data, hash_kind=None):
if hash_kind is None:
hash_kind = self.hash_kind
try:
hasher = getattr(hashlib, hash_kind)
except AttributeError:
raise DistlibException('Unsupported hash algorithm: %r' % hash_kind)
result = hasher(data).digest()
result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
return hash_kind, result
def write_record(self, records, record_path, base):
records = list(records) # make a copy for sorting
p = to_posix(os.path.relpath(record_path, base))
records.append((p, '', ''))
records.sort()
with CSVWriter(record_path) as writer:
for row in records:
writer.writerow(row)
def write_records(self, info, libdir, archive_paths):
records = []
distinfo, info_dir = info
hasher = getattr(hashlib, self.hash_kind)
for ap, p in archive_paths:
with open(p, 'rb') as f:
data = f.read()
digest = '%s=%s' % self.get_hash(data)
size = os.path.getsize(p)
records.append((ap, digest, size))
p = os.path.join(distinfo, 'RECORD')
self.write_record(records, p, libdir)
ap = to_posix(os.path.join(info_dir, 'RECORD'))
archive_paths.append((ap, p))
def build_zip(self, pathname, archive_paths):
with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
for ap, p in archive_paths:
logger.debug('Wrote %s to %s in wheel', p, ap)
zf.write(p, ap)
def build(self, paths, tags=None, wheel_version=None):
"""
Build a wheel from files in specified paths, and use any specified tags
when determining the name of the wheel.
"""
if tags is None:
tags = {}
libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
if libkey == 'platlib':
is_pure = 'false'
default_pyver = [IMPVER]
default_abi = [ABI]
default_arch = [ARCH]
else:
is_pure = 'true'
default_pyver = [PYVER]
default_abi = ['none']
default_arch = ['any']
self.pyver = tags.get('pyver', default_pyver)
self.abi = tags.get('abi', default_abi)
self.arch = tags.get('arch', default_arch)
libdir = paths[libkey]
name_ver = '%s-%s' % (self.name, self.version)
data_dir = '%s.data' % name_ver
info_dir = '%s.dist-info' % name_ver
archive_paths = []
# First, stuff which is not in site-packages
for key in ('data', 'headers', 'scripts'):
if key not in paths:
continue
path = paths[key]
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for fn in files:
p = fsdecode(os.path.join(root, fn))
rp = os.path.relpath(p, path)
ap = to_posix(os.path.join(data_dir, key, rp))
archive_paths.append((ap, p))
if key == 'scripts' and not p.endswith('.exe'):
with open(p, 'rb') as f:
data = f.read()
data = self.process_shebang(data)
with open(p, 'wb') as f:
f.write(data)
# Now, stuff which is in site-packages, other than the
# distinfo stuff.
path = libdir
distinfo = None
for root, dirs, files in os.walk(path):
if root == path:
# At the top level only, save distinfo for later
# and skip it for now
for i, dn in enumerate(dirs):
dn = fsdecode(dn)
if dn.endswith('.dist-info'):
distinfo = os.path.join(root, dn)
del dirs[i]
break
assert distinfo, '.dist-info directory expected, not found'
for fn in files:
# comment out next suite to leave .pyc files in
if fsdecode(fn).endswith(('.pyc', '.pyo')):
continue
p = os.path.join(root, fn)
rp = to_posix(os.path.relpath(p, path))
archive_paths.append((rp, p))
# Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
files = os.listdir(distinfo)
for fn in files:
if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'):
p = fsdecode(os.path.join(distinfo, fn))
ap = to_posix(os.path.join(info_dir, fn))
archive_paths.append((ap, p))
wheel_metadata = [
'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
'Generator: distlib %s' % __version__,
'Root-Is-Purelib: %s' % is_pure,
]
for pyver, abi, arch in self.tags:
wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
p = os.path.join(distinfo, 'WHEEL')
with open(p, 'w') as f:
f.write('\n'.join(wheel_metadata))
ap = to_posix(os.path.join(info_dir, 'WHEEL'))
archive_paths.append((ap, p))
# Now, at last, RECORD.
# Paths in here are archive paths - nothing else makes sense.
self.write_records((distinfo, info_dir), libdir, archive_paths)
# Now, ready to build the zip file
pathname = os.path.join(self.dirname, self.filename)
self.build_zip(pathname, archive_paths)
return pathname
def install(self, paths, maker, **kwargs):
"""
Install a wheel to the specified paths. If kwarg ``warner`` is
specified, it should be a callable, which will be called with two
tuples indicating the wheel version of this software and the wheel
version in the file, if there is a discrepancy in the versions.
This can be used to issue any warnings to raise any exceptions.
If kwarg ``lib_only`` is True, only the purelib/platlib files are
installed, and the headers, scripts, data and dist-info metadata are
not written.
The return value is a :class:`InstalledDistribution` instance unless
``options.lib_only`` is True, in which case the return value is ``None``.
"""
dry_run = maker.dry_run
warner = kwargs.get('warner')
lib_only = kwargs.get('lib_only', False)
pathname = os.path.join(self.dirname, self.filename)
name_ver = '%s-%s' % (self.name, self.version)
data_dir = '%s.data' % name_ver
info_dir = '%s.dist-info' % name_ver
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
record_name = posixpath.join(info_dir, 'RECORD')
wrapper = codecs.getreader('utf-8')
with ZipFile(pathname, 'r') as zf:
with zf.open(wheel_metadata_name) as bwf:
wf = wrapper(bwf)
message = message_from_file(wf)
wv = message['Wheel-Version'].split('.', 1)
file_version = tuple([int(i) for i in wv])
if (file_version != self.wheel_version) and warner:
warner(self.wheel_version, file_version)
if message['Root-Is-Purelib'] == 'true':
libdir = paths['purelib']
else:
libdir = paths['platlib']
records = {}
with zf.open(record_name) as bf:
with CSVReader(stream=bf) as reader:
for row in reader:
p = row[0]
records[p] = row
data_pfx = posixpath.join(data_dir, '')
info_pfx = posixpath.join(info_dir, '')
script_pfx = posixpath.join(data_dir, 'scripts', '')
# make a new instance rather than a copy of maker's,
# as we mutate it
fileop = FileOperator(dry_run=dry_run)
fileop.record = True # so we can rollback if needed
bc = not sys.dont_write_bytecode # Double negatives. Lovely!
outfiles = [] # for RECORD writing
# for script copying/shebang processing
workdir = tempfile.mkdtemp()
# set target dir later
# we default add_launchers to False, as the
# Python Launcher should be used instead
maker.source_dir = workdir
maker.target_dir = None
try:
for zinfo in zf.infolist():
arcname = zinfo.filename
if isinstance(arcname, text_type):
u_arcname = arcname
else:
u_arcname = arcname.decode('utf-8')
# The signature file won't be in RECORD,
# and we don't currently don't do anything with it
if u_arcname.endswith('/RECORD.jws'):
continue
row = records[u_arcname]
if row[2] and str(zinfo.file_size) != row[2]:
raise DistlibException('size mismatch for '
'%s' % u_arcname)
if row[1]:
kind, value = row[1].split('=', 1)
with zf.open(arcname) as bf:
data = bf.read()
_, digest = self.get_hash(data, kind)
if digest != value:
raise DistlibException('digest mismatch for '
'%s' % arcname)
if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
logger.debug('lib_only: skipping %s', u_arcname)
continue
is_script = (u_arcname.startswith(script_pfx)
and not u_arcname.endswith('.exe'))
if u_arcname.startswith(data_pfx):
_, where, rp = u_arcname.split('/', 2)
outfile = os.path.join(paths[where], convert_path(rp))
else:
# meant for site-packages.
if u_arcname in (wheel_metadata_name, record_name):
continue
outfile = os.path.join(libdir, convert_path(u_arcname))
if not is_script:
with zf.open(arcname) as bf:
fileop.copy_stream(bf, outfile)
outfiles.append(outfile)
# Double check the digest of the written file
if not dry_run and row[1]:
with open(outfile, 'rb') as bf:
data = bf.read()
_, newdigest = self.get_hash(data, kind)
if newdigest != digest:
raise DistlibException('digest mismatch '
'on write for '
'%s' % outfile)
if bc and outfile.endswith('.py'):
try:
pyc = fileop.byte_compile(outfile)
outfiles.append(pyc)
except Exception:
# Don't give up if byte-compilation fails,
# but log it and perhaps warn the user
logger.warning('Byte-compilation failed',
exc_info=True)
else:
fn = os.path.basename(convert_path(arcname))
workname = os.path.join(workdir, fn)
with zf.open(arcname) as bf:
fileop.copy_stream(bf, workname)
dn, fn = os.path.split(outfile)
maker.target_dir = dn
filenames = maker.make(fn)
fileop.set_executable_mode(filenames)
outfiles.extend(filenames)
if lib_only:
logger.debug('lib_only: returning None')
dist = None
else:
# Generate scripts
# Try to get pydist.json so we can see if there are
# any commands to generate. If this fails (e.g. because
# of a legacy wheel), log a warning but don't give up.
commands = None
file_version = self.info['Wheel-Version']
if file_version == '1.0':
# Use legacy info
ep = posixpath.join(info_dir, 'entry_points.txt')
try:
with zf.open(ep) as bwf:
epdata = read_exports(bwf)
commands = {}
for key in ('console', 'gui'):
k = '%s_scripts' % key
if k in epdata:
commands['wrap_%s' % key] = d = {}
for v in epdata[k].values():
s = '%s:%s' % (v.prefix, v.suffix)
if v.flags:
s += ' %s' % v.flags
d[v.name] = s
except Exception:
logger.warning('Unable to read legacy script '
'metadata, so cannot generate '
'scripts')
else:
try:
with zf.open(metadata_name) as bwf:
wf = wrapper(bwf)
commands = json.load(wf).get('extensions')
if commands:
commands = commands.get('python.commands')
except Exception:
logger.warning('Unable to read JSON metadata, so '
'cannot generate scripts')
if commands:
console_scripts = commands.get('wrap_console', {})
gui_scripts = commands.get('wrap_gui', {})
if console_scripts or gui_scripts:
script_dir = paths.get('scripts', '')
if not os.path.isdir(script_dir):
raise ValueError('Valid script path not '
'specified')
maker.target_dir = script_dir
for k, v in console_scripts.items():
script = '%s = %s' % (k, v)
filenames = maker.make(script)
fileop.set_executable_mode(filenames)
if gui_scripts:
options = {'gui': True }
for k, v in gui_scripts.items():
script = '%s = %s' % (k, v)
filenames = maker.make(script, options)
fileop.set_executable_mode(filenames)
p = os.path.join(libdir, info_dir)
dist = InstalledDistribution(p)
# Write SHARED
paths = dict(paths) # don't change passed in dict
del paths['purelib']
del paths['platlib']
paths['lib'] = libdir
p = dist.write_shared_locations(paths, dry_run)
if p:
outfiles.append(p)
# Write RECORD
dist.write_installed_files(outfiles, paths['prefix'],
dry_run)
return dist
except Exception: # pragma: no cover
logger.exception('installation failed.')
fileop.rollback()
raise
finally:
shutil.rmtree(workdir)
def _get_dylib_cache(self):
global cache
if cache is None:
# Use native string to avoid issues on 2.x: see Python #20140.
base = os.path.join(get_cache_base(), str('dylib-cache'),
sys.version[:3])
cache = Cache(base)
return cache
def _get_extensions(self):
pathname = os.path.join(self.dirname, self.filename)
name_ver = '%s-%s' % (self.name, self.version)
info_dir = '%s.dist-info' % name_ver
arcname = posixpath.join(info_dir, 'EXTENSIONS')
wrapper = codecs.getreader('utf-8')
result = []
with ZipFile(pathname, 'r') as zf:
try:
with zf.open(arcname) as bf:
wf = wrapper(bf)
extensions = json.load(wf)
cache = self._get_dylib_cache()
prefix = cache.prefix_to_dir(pathname)
cache_base = os.path.join(cache.base, prefix)
if not os.path.isdir(cache_base):
os.makedirs(cache_base)
for name, relpath in extensions.items():
dest = os.path.join(cache_base, convert_path(relpath))
if not os.path.exists(dest):
extract = True
else:
file_time = os.stat(dest).st_mtime
file_time = datetime.datetime.fromtimestamp(file_time)
info = zf.getinfo(relpath)
wheel_time = datetime.datetime(*info.date_time)
extract = wheel_time > file_time
if extract:
zf.extract(relpath, cache_base)
result.append((name, dest))
except KeyError:
pass
return result
def is_compatible(self):
"""
Determine if a wheel is compatible with the running system.
"""
return is_compatible(self)
def is_mountable(self):
"""
Determine if a wheel is asserted as mountable by its metadata.
"""
return True # for now - metadata details TBD
def mount(self, append=False):
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
if not self.is_compatible():
msg = 'Wheel %s not compatible with this Python.' % pathname
raise DistlibException(msg)
if not self.is_mountable():
msg = 'Wheel %s is marked as not mountable.' % pathname
raise DistlibException(msg)
if pathname in sys.path:
logger.debug('%s already in path', pathname)
else:
if append:
sys.path.append(pathname)
else:
sys.path.insert(0, pathname)
extensions = self._get_extensions()
if extensions:
if _hook not in sys.meta_path:
sys.meta_path.append(_hook)
_hook.add(pathname, extensions)
def unmount(self):
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
if pathname not in sys.path:
logger.debug('%s not in path', pathname)
else:
sys.path.remove(pathname)
if pathname in _hook.impure_wheels:
_hook.remove(pathname)
if not _hook.impure_wheels:
if _hook in sys.meta_path:
sys.meta_path.remove(_hook)
def verify(self):
pathname = os.path.join(self.dirname, self.filename)
name_ver = '%s-%s' % (self.name, self.version)
data_dir = '%s.data' % name_ver
info_dir = '%s.dist-info' % name_ver
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
record_name = posixpath.join(info_dir, 'RECORD')
wrapper = codecs.getreader('utf-8')
with ZipFile(pathname, 'r') as zf:
with zf.open(wheel_metadata_name) as bwf:
wf = wrapper(bwf)
message = message_from_file(wf)
wv = message['Wheel-Version'].split('.', 1)
file_version = tuple([int(i) for i in wv])
# TODO version verification
records = {}
with zf.open(record_name) as bf:
with CSVReader(stream=bf) as reader:
for row in reader:
p = row[0]
records[p] = row
for zinfo in zf.infolist():
arcname = zinfo.filename
if isinstance(arcname, text_type):
u_arcname = arcname
else:
u_arcname = arcname.decode('utf-8')
if '..' in u_arcname:
raise DistlibException('invalid entry in '
'wheel: %r' % u_arcname)
# The signature file won't be in RECORD,
# and we don't currently don't do anything with it
if u_arcname.endswith('/RECORD.jws'):
continue
row = records[u_arcname]
if row[2] and str(zinfo.file_size) != row[2]:
raise DistlibException('size mismatch for '
'%s' % u_arcname)
if row[1]:
kind, value = row[1].split('=', 1)
with zf.open(arcname) as bf:
data = bf.read()
_, digest = self.get_hash(data, kind)
if digest != value:
raise DistlibException('digest mismatch for '
'%s' % arcname)
def update(self, modifier, dest_dir=None, **kwargs):
"""
Update the contents of a wheel in a generic way. The modifier should
be a callable which expects a dictionary argument: its keys are
archive-entry paths, and its values are absolute filesystem paths
where the contents the corresponding archive entries can be found. The
modifier is free to change the contents of the files pointed to, add
new entries and remove entries, before returning. This method will
extract the entire contents of the wheel to a temporary location, call
the modifier, and then use the passed (and possibly updated)
dictionary to write a new wheel. If ``dest_dir`` is specified, the new
wheel is written there -- otherwise, the original wheel is overwritten.
The modifier should return True if it updated the wheel, else False.
This method returns the same value the modifier returns.
"""
def get_version(path_map, info_dir):
version = path = None
key = '%s/%s' % (info_dir, METADATA_FILENAME)
if key not in path_map:
key = '%s/PKG-INFO' % info_dir
if key in path_map:
path = path_map[key]
version = Metadata(path=path).version
return version, path
def update_version(version, path):
updated = None
try:
v = NormalizedVersion(version)
i = version.find('-')
if i < 0:
updated = '%s+1' % version
else:
parts = [int(s) for s in version[i + 1:].split('.')]
parts[-1] += 1
updated = '%s+%s' % (version[:i],
'.'.join(str(i) for i in parts))
except UnsupportedVersionError:
logger.debug('Cannot update non-compliant (PEP-440) '
'version %r', version)
if updated:
md = Metadata(path=path)
md.version = updated
legacy = not path.endswith(METADATA_FILENAME)
md.write(path=path, legacy=legacy)
logger.debug('Version updated from %r to %r', version,
updated)
pathname = os.path.join(self.dirname, self.filename)
name_ver = '%s-%s' % (self.name, self.version)
info_dir = '%s.dist-info' % name_ver
record_name = posixpath.join(info_dir, 'RECORD')
with tempdir() as workdir:
with ZipFile(pathname, 'r') as zf:
path_map = {}
for zinfo in zf.infolist():
arcname = zinfo.filename
if isinstance(arcname, text_type):
u_arcname = arcname
else:
u_arcname = arcname.decode('utf-8')
if u_arcname == record_name:
continue
if '..' in u_arcname:
raise DistlibException('invalid entry in '
'wheel: %r' % u_arcname)
zf.extract(zinfo, workdir)
path = os.path.join(workdir, convert_path(u_arcname))
path_map[u_arcname] = path
# Remember the version.
original_version, _ = get_version(path_map, info_dir)
# Files extracted. Call the modifier.
modified = modifier(path_map, **kwargs)
if modified:
# Something changed - need to build a new wheel.
current_version, path = get_version(path_map, info_dir)
if current_version and (current_version == original_version):
# Add or update local version to signify changes.
update_version(current_version, path)
# Decide where the new wheel goes.
if dest_dir is None:
fd, newpath = tempfile.mkstemp(suffix='.whl',
prefix='wheel-update-',
dir=workdir)
os.close(fd)
else:
if not os.path.isdir(dest_dir):
raise DistlibException('Not a directory: %r' % dest_dir)
newpath = os.path.join(dest_dir, self.filename)
archive_paths = list(path_map.items())
distinfo = os.path.join(workdir, info_dir)
info = distinfo, info_dir
self.write_records(info, workdir, archive_paths)
self.build_zip(newpath, archive_paths)
if dest_dir is None:
shutil.copyfile(newpath, pathname)
return modified
def compatible_tags():
"""
Return (pyver, abi, arch) tuples compatible with this Python.
"""
versions = [VER_SUFFIX]
major = VER_SUFFIX[0]
for minor in range(sys.version_info[1] - 1, - 1, -1):
versions.append(''.join([major, str(minor)]))
abis = []
for suffix, _, _ in imp.get_suffixes():
if suffix.startswith('.abi'):
abis.append(suffix.split('.', 2)[1])
abis.sort()
if ABI != 'none':
abis.insert(0, ABI)
abis.append('none')
result = []
arches = [ARCH]
if sys.platform == 'darwin':
m = re.match('(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
if m:
name, major, minor, arch = m.groups()
minor = int(minor)
matches = [arch]
if arch in ('i386', 'ppc'):
matches.append('fat')
if arch in ('i386', 'ppc', 'x86_64'):
matches.append('fat3')
if arch in ('ppc64', 'x86_64'):
matches.append('fat64')
if arch in ('i386', 'x86_64'):
matches.append('intel')
if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
matches.append('universal')
while minor >= 0:
for match in matches:
s = '%s_%s_%s_%s' % (name, major, minor, match)
if s != ARCH: # already there
arches.append(s)
minor -= 1
# Most specific - our Python version, ABI and arch
for abi in abis:
for arch in arches:
result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
# where no ABI / arch dependency, but IMP_PREFIX dependency
for i, version in enumerate(versions):
result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
if i == 0:
result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
# no IMP_PREFIX, ABI or arch dependency
for i, version in enumerate(versions):
result.append((''.join(('py', version)), 'none', 'any'))
if i == 0:
result.append((''.join(('py', version[0])), 'none', 'any'))
return set(result)
COMPATIBLE_TAGS = compatible_tags()
del compatible_tags
def is_compatible(wheel, tags=None):
if not isinstance(wheel, Wheel):
wheel = Wheel(wheel) # assume it's a filename
result = False
if tags is None:
tags = COMPATIBLE_TAGS
for ver, abi, arch in tags:
if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
result = True
break
return result
# Copyright 2015,2016 Nir Cohen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The ``distro`` package (``distro`` stands for Linux Distribution) provides
information about the Linux distribution it runs on, such as a reliable
machine-readable distro ID, or version information.
It is a renewed alternative implementation for Python's original
:py:func:`platform.linux_distribution` function, but it provides much more
functionality. An alternative implementation became necessary because Python
3.5 deprecated this function, and Python 3.7 is expected to remove it
altogether. Its predecessor function :py:func:`platform.dist` was already
deprecated since Python 2.6 and is also expected to be removed in Python 3.7.
Still, there are many cases in which access to Linux distribution information
is needed. See `Python issue 1322 <https://bugs.python.org/issue1322>`_ for
more information.
"""
import os
import re
import sys
import json
import shlex
import logging
import subprocess
if not sys.platform.startswith('linux'):
raise ImportError('Unsupported platform: {0}'.format(sys.platform))
_UNIXCONFDIR = '/etc'
_OS_RELEASE_BASENAME = 'os-release'
#: Translation table for normalizing the "ID" attribute defined in os-release
#: files, for use by the :func:`distro.id` method.
#:
#: * Key: Value as defined in the os-release file, translated to lower case,
#: with blanks translated to underscores.
#:
#: * Value: Normalized value.
NORMALIZED_OS_ID = {}
#: Translation table for normalizing the "Distributor ID" attribute returned by
#: the lsb_release command, for use by the :func:`distro.id` method.
#:
#: * Key: Value as returned by the lsb_release command, translated to lower
#: case, with blanks translated to underscores.
#:
#: * Value: Normalized value.
NORMALIZED_LSB_ID = {
'enterpriseenterprise': 'oracle', # Oracle Enterprise Linux
'redhatenterpriseworkstation': 'rhel', # RHEL 6.7
}
#: Translation table for normalizing the distro ID derived from the file name
#: of distro release files, for use by the :func:`distro.id` method.
#:
#: * Key: Value as derived from the file name of a distro release file,
#: translated to lower case, with blanks translated to underscores.
#:
#: * Value: Normalized value.
NORMALIZED_DISTRO_ID = {
'redhat': 'rhel', # RHEL 6.x, 7.x
}
# Pattern for content of distro release file (reversed)
_DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile(
r'(?:[^)]*\)(.*)\()? *(?:STL )?([\d.+\-a-z]*\d) *(?:esaeler *)?(.+)')
# Pattern for base file name of distro release file
_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(
r'(\w+)[-_](release|version)$')
# Base file names to be ignored when searching for distro release file
_DISTRO_RELEASE_IGNORE_BASENAMES = (
'debian_version',
'lsb-release',
'oem-release',
_OS_RELEASE_BASENAME,
'system-release'
)
def linux_distribution(full_distribution_name=True):
"""
Return information about the current Linux distribution as a tuple
``(id_name, version, codename)`` with items as follows:
* ``id_name``: If *full_distribution_name* is false, the result of
:func:`distro.id`. Otherwise, the result of :func:`distro.name`.
* ``version``: The result of :func:`distro.version`.
* ``codename``: The result of :func:`distro.codename`.
The interface of this function is compatible with the original
:py:func:`platform.linux_distribution` function, supporting a subset of
its parameters.
The data it returns may not exactly be the same, because it uses more data
sources than the original function, and that may lead to different data if
the Linux distribution is not consistent across multiple data sources it
provides (there are indeed such distributions ...).
Another reason for differences is the fact that the :func:`distro.id`
method normalizes the distro ID string to a reliable machine-readable value
for a number of popular Linux distributions.
"""
return _distro.linux_distribution(full_distribution_name)
def id():
"""
Return the distro ID of the current Linux distribution, as a
machine-readable string.
For a number of Linux distributions, the returned distro ID value is
*reliable*, in the sense that it is documented and that it does not change
across releases of the distribution.
This package maintains the following reliable distro ID values:
============== =========================================
Distro ID Distribution
============== =========================================
"ubuntu" Ubuntu
"debian" Debian
"rhel" RedHat Enterprise Linux
"centos" CentOS
"fedora" Fedora
"sles" SUSE Linux Enterprise Server
"opensuse" openSUSE
"amazon" Amazon Linux
"arch" Arch Linux
"cloudlinux" CloudLinux OS
"exherbo" Exherbo Linux
"gentoo" GenToo Linux
"ibm_powerkvm" IBM PowerKVM
"kvmibm" KVM for IBM z Systems
"linuxmint" Linux Mint
"mageia" Mageia
"mandriva" Mandriva Linux
"parallels" Parallels
"pidora" Pidora
"raspbian" Raspbian
"oracle" Oracle Linux (and Oracle Enterprise Linux)
"scientific" Scientific Linux
"slackware" Slackware
"xenserver" XenServer
============== =========================================
If you have a need to get distros for reliable IDs added into this set,
or if you find that the :func:`distro.id` function returns a different
distro ID for one of the listed distros, please create an issue in the
`distro issue tracker`_.
**Lookup hierarchy and transformations:**
First, the ID is obtained from the following sources, in the specified
order. The first available and non-empty value is used:
* the value of the "ID" attribute of the os-release file,
* the value of the "Distributor ID" attribute returned by the lsb_release
command,
* the first part of the file name of the distro release file,
The so determined ID value then passes the following transformations,
before it is returned by this method:
* it is translated to lower case,
* blanks (which should not be there anyway) are translated to underscores,
* a normalization of the ID is performed, based upon
`normalization tables`_. The purpose of this normalization is to ensure
that the ID is as reliable as possible, even across incompatible changes
in the Linux distributions. A common reason for an incompatible change is
the addition of an os-release file, or the addition of the lsb_release
command, with ID values that differ from what was previously determined
from the distro release file name.
"""
return _distro.id()
def name(pretty=False):
"""
Return the name of the current Linux distribution, as a human-readable
string.
If *pretty* is false, the name is returned without version or codename.
(e.g. "CentOS Linux")
If *pretty* is true, the version and codename are appended.
(e.g. "CentOS Linux 7.1.1503 (Core)")
**Lookup hierarchy:**
The name is obtained from the following sources, in the specified order.
The first available and non-empty value is used:
* If *pretty* is false:
- the value of the "NAME" attribute of the os-release file,
- the value of the "Distributor ID" attribute returned by the lsb_release
command,
- the value of the "<name>" field of the distro release file.
* If *pretty* is true:
- the value of the "PRETTY_NAME" attribute of the os-release file,
- the value of the "Description" attribute returned by the lsb_release
command,
- the value of the "<name>" field of the distro release file, appended
with the value of the pretty version ("<version_id>" and "<codename>"
fields) of the distro release file, if available.
"""
return _distro.name(pretty)
def version(pretty=False, best=False):
"""
Return the version of the current Linux distribution, as a human-readable
string.
If *pretty* is false, the version is returned without codename (e.g.
"7.0").
If *pretty* is true, the codename in parenthesis is appended, if the
codename is non-empty (e.g. "7.0 (Maipo)").
Some distributions provide version numbers with different precisions in
the different sources of distribution information. Examining the different
sources in a fixed priority order does not always yield the most precise
version (e.g. for Debian 8.2, or CentOS 7.1).
The *best* parameter can be used to control the approach for the returned
version:
If *best* is false, the first non-empty version number in priority order of
the examined sources is returned.
If *best* is true, the most precise version number out of all examined
sources is returned.
**Lookup hierarchy:**
In all cases, the version number is obtained from the following sources.
If *best* is false, this order represents the priority order:
* the value of the "VERSION_ID" attribute of the os-release file,
* the value of the "Release" attribute returned by the lsb_release
command,
* the version number parsed from the "<version_id>" field of the first line
of the distro release file,
* the version number parsed from the "PRETTY_NAME" attribute of the
os-release file, if it follows the format of the distro release files.
* the version number parsed from the "Description" attribute returned by
the lsb_release command, if it follows the format of the distro release
files.
"""
return _distro.version(pretty, best)
def version_parts(best=False):
"""
Return the version of the current Linux distribution as a tuple
``(major, minor, build_number)`` with items as follows:
* ``major``: The result of :func:`distro.major_version`.
* ``minor``: The result of :func:`distro.minor_version`.
* ``build_number``: The result of :func:`distro.build_number`.
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
return _distro.version_parts(best)
def major_version(best=False):
"""
Return the major version of the current Linux distribution, as a string,
if provided.
Otherwise, the empty string is returned. The major version is the first
part of the dot-separated version string.
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
return _distro.major_version(best)
def minor_version(best=False):
"""
Return the minor version of the current Linux distribution, as a string,
if provided.
Otherwise, the empty string is returned. The minor version is the second
part of the dot-separated version string.
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
return _distro.minor_version(best)
def build_number(best=False):
"""
Return the build number of the current Linux distribution, as a string,
if provided.
Otherwise, the empty string is returned. The build number is the third part
of the dot-separated version string.
For a description of the *best* parameter, see the :func:`distro.version`
method.
"""
return _distro.build_number(best)
def like():
"""
Return a space-separated list of distro IDs of distributions that are
closely related to the current Linux distribution in regards to packaging
and programming interfaces, for example distributions the current
distribution is a derivative from.
**Lookup hierarchy:**
This information item is only provided by the os-release file.
For details, see the description of the "ID_LIKE" attribute in the
`os-release man page
<http://www.freedesktop.org/software/systemd/man/os-release.html>`_.
"""
return _distro.like()
def codename():
"""
Return the codename for the release of the current Linux distribution,
as a string.
If the distribution does not have a codename, an empty string is returned.
Note that the returned codename is not always really a codename. For
example, openSUSE returns "x86_64". This function does not handle such
cases in any special way and just returns the string it finds, if any.
**Lookup hierarchy:**
* the codename within the "VERSION" attribute of the os-release file, if
provided,
* the value of the "Codename" attribute returned by the lsb_release
command,
* the value of the "<codename>" field of the distro release file.
"""
return _distro.codename()
def info(pretty=False, best=False):
"""
Return certain machine-readable information items about the current Linux
distribution in a dictionary, as shown in the following example:
.. sourcecode:: python
{
'id': 'rhel',
'version': '7.0',
'version_parts': {
'major': '7',
'minor': '0',
'build_number': ''
},
'like': 'fedora',
'codename': 'Maipo'
}
The dictionary structure and keys are always the same, regardless of which
information items are available in the underlying data sources. The values
for the various keys are as follows:
* ``id``: The result of :func:`distro.id`.
* ``version``: The result of :func:`distro.version`.
* ``version_parts -> major``: The result of :func:`distro.major_version`.
* ``version_parts -> minor``: The result of :func:`distro.minor_version`.
* ``version_parts -> build_number``: The result of
:func:`distro.build_number`.
* ``like``: The result of :func:`distro.like`.
* ``codename``: The result of :func:`distro.codename`.
For a description of the *pretty* and *best* parameters, see the
:func:`distro.version` method.
"""
return _distro.info(pretty, best)
def os_release_info():
"""
Return a dictionary containing key-value pairs for the information items
from the os-release file data source of the current Linux distribution.
See `os-release file`_ for details about these information items.
"""
return _distro.os_release_info()
def lsb_release_info():
"""
Return a dictionary containing key-value pairs for the information items
from the lsb_release command data source of the current Linux distribution.
See `lsb_release command output`_ for details about these information
items.
"""
return _distro.lsb_release_info()
def distro_release_info():
"""
Return a dictionary containing key-value pairs for the information items
from the distro release file data source of the current Linux distribution.
See `distro release file`_ for details about these information items.
"""
return _distro.distro_release_info()
def os_release_attr(attribute):
"""
Return a single named information item from the os-release file data source
of the current Linux distribution.
Parameters:
* ``attribute`` (string): Key of the information item.
Returns:
* (string): Value of the information item, if the item exists.
The empty string, if the item does not exist.
See `os-release file`_ for details about these information items.
"""
return _distro.os_release_attr(attribute)
def lsb_release_attr(attribute):
"""
Return a single named information item from the lsb_release command output
data source of the current Linux distribution.
Parameters:
* ``attribute`` (string): Key of the information item.
Returns:
* (string): Value of the information item, if the item exists.
The empty string, if the item does not exist.
See `lsb_release command output`_ for details about these information
items.
"""
return _distro.lsb_release_attr(attribute)
def distro_release_attr(attribute):
"""
Return a single named information item from the distro release file
data source of the current Linux distribution.
Parameters:
* ``attribute`` (string): Key of the information item.
Returns:
* (string): Value of the information item, if the item exists.
The empty string, if the item does not exist.
See `distro release file`_ for details about these information items.
"""
return _distro.distro_release_attr(attribute)
class LinuxDistribution(object):
"""
Provides information about a Linux distribution.
This package creates a private module-global instance of this class with
default initialization arguments, that is used by the
`consolidated accessor functions`_ and `single source accessor functions`_.
By using default initialization arguments, that module-global instance
returns data about the current Linux distribution (i.e. the distro this
package runs on).
Normally, it is not necessary to create additional instances of this class.
However, in situations where control is needed over the exact data sources
that are used, instances of this class can be created with a specific
distro release file, or a specific os-release file, or without invoking the
lsb_release command.
"""
def __init__(self,
include_lsb=True,
os_release_file='',
distro_release_file=''):
"""
The initialization method of this class gathers information from the
available data sources, and stores that in private instance attributes.
Subsequent access to the information items uses these private instance
attributes, so that the data sources are read only once.
Parameters:
* ``include_lsb`` (bool): Controls whether the
`lsb_release command output`_ is included as a data source.
If the lsb_release command is not available in the program execution
path, the data source for the lsb_release command will be empty.
* ``os_release_file`` (string): The path name of the
`os-release file`_ that is to be used as a data source.
An empty string (the default) will cause the default path name to
be used (see `os-release file`_ for details).
If the specified or defaulted os-release file does not exist, the
data source for the os-release file will be empty.
* ``distro_release_file`` (string): The path name of the
`distro release file`_ that is to be used as a data source.
An empty string (the default) will cause a default search algorithm
to be used (see `distro release file`_ for details).
If the specified distro release file does not exist, or if no default
distro release file can be found, the data source for the distro
release file will be empty.
Public instance attributes:
* ``os_release_file`` (string): The path name of the
`os-release file`_ that is actually used as a data source. The
empty string if no distro release file is used as a data source.
* ``distro_release_file`` (string): The path name of the
`distro release file`_ that is actually used as a data source. The
empty string if no distro release file is used as a data source.
Raises:
* :py:exc:`IOError`: Some I/O issue with an os-release file or distro
release file.
* :py:exc:`subprocess.CalledProcessError`: The lsb_release command had
some issue (other than not being available in the program execution
path).
* :py:exc:`UnicodeError`: A data source has unexpected characters or
uses an unexpected encoding.
"""
self.os_release_file = os_release_file or \
os.path.join(_UNIXCONFDIR, _OS_RELEASE_BASENAME)
self.distro_release_file = distro_release_file or '' # updated later
self._os_release_info = self._get_os_release_info()
self._lsb_release_info = self._get_lsb_release_info() \
if include_lsb else {}
self._distro_release_info = self._get_distro_release_info()
def __repr__(self):
"""Return repr of all info
"""
return \
"LinuxDistribution(" \
"os_release_file={0!r}, " \
"distro_release_file={1!r}, " \
"_os_release_info={2!r}, " \
"_lsb_release_info={3!r}, " \
"_distro_release_info={4!r})".format(
self.os_release_file,
self.distro_release_file,
self._os_release_info,
self._lsb_release_info,
self._distro_release_info)
def linux_distribution(self, full_distribution_name=True):
"""
Return information about the Linux distribution that is compatible
with Python's :func:`platform.linux_distribution`, supporting a subset
of its parameters.
For details, see :func:`distro.linux_distribution`.
"""
return (
self.name() if full_distribution_name else self.id(),
self.version(),
self.codename()
)
def id(self):
"""Return the distro ID of the Linux distribution, as a string.
For details, see :func:`distro.id`.
"""
def normalize(distro_id, table):
distro_id = distro_id.lower().replace(' ', '_')
return table.get(distro_id, distro_id)
distro_id = self.os_release_attr('id')
if distro_id:
return normalize(distro_id, NORMALIZED_OS_ID)
distro_id = self.lsb_release_attr('distributor_id')
if distro_id:
return normalize(distro_id, NORMALIZED_LSB_ID)
distro_id = self.distro_release_attr('id')
if distro_id:
return normalize(distro_id, NORMALIZED_DISTRO_ID)
return ''
def name(self, pretty=False):
"""
Return the name of the Linux distribution, as a string.
For details, see :func:`distro.name`.
"""
name = self.os_release_attr('name') \
or self.lsb_release_attr('distributor_id') \
or self.distro_release_attr('name')
if pretty:
name = self.os_release_attr('pretty_name') \
or self.lsb_release_attr('description')
if not name:
name = self.distro_release_attr('name')
version = self.version(pretty=True)
if version:
name = name + ' ' + version
return name or ''
def version(self, pretty=False, best=False):
"""
Return the version of the Linux distribution, as a string.
For details, see :func:`distro.version`.
"""
versions = [
self.os_release_attr('version_id'),
self.lsb_release_attr('release'),
self.distro_release_attr('version_id'),
self._parse_distro_release_content(
self.os_release_attr('pretty_name')).get('version_id', ''),
self._parse_distro_release_content(
self.lsb_release_attr('description')).get('version_id', '')
]
version = ''
if best:
# This algorithm uses the last version in priority order that has
# the best precision. If the versions are not in conflict, that
# does not matter; otherwise, using the last one instead of the
# first one might be considered a surprise.
for v in versions:
if v.count(".") > version.count(".") or version == '':
version = v
else:
for v in versions:
if v != '':
version = v
break
if pretty and version and self.codename():
version = u'{0} ({1})'.format(version, self.codename())
return version
def version_parts(self, best=False):
"""
Return the version of the Linux distribution, as a tuple of version
numbers.
For details, see :func:`distro.version_parts`.
"""
version_str = self.version(best=best)
if version_str:
version_regex = re.compile(r'(\d+)\.?(\d+)?\.?(\d+)?')
matches = version_regex.match(version_str)
if matches:
major, minor, build_number = matches.groups()
return major, minor or '', build_number or ''
return '', '', ''
def major_version(self, best=False):
"""
Return the major version number of the current distribution.
For details, see :func:`distro.major_version`.
"""
return self.version_parts(best)[0]
def minor_version(self, best=False):
"""
Return the minor version number of the Linux distribution.
For details, see :func:`distro.minor_version`.
"""
return self.version_parts(best)[1]
def build_number(self, best=False):
"""
Return the build number of the Linux distribution.
For details, see :func:`distro.build_number`.
"""
return self.version_parts(best)[2]
def like(self):
"""
Return the IDs of distributions that are like the Linux distribution.
For details, see :func:`distro.like`.
"""
return self.os_release_attr('id_like') or ''
def codename(self):
"""
Return the codename of the Linux distribution.
For details, see :func:`distro.codename`.
"""
return self.os_release_attr('codename') \
or self.lsb_release_attr('codename') \
or self.distro_release_attr('codename') \
or ''
def info(self, pretty=False, best=False):
"""
Return certain machine-readable information about the Linux
distribution.
For details, see :func:`distro.info`.
"""
return dict(
id=self.id(),
version=self.version(pretty, best),
version_parts=dict(
major=self.major_version(best),
minor=self.minor_version(best),
build_number=self.build_number(best)
),
like=self.like(),
codename=self.codename(),
)
def os_release_info(self):
"""
Return a dictionary containing key-value pairs for the information
items from the os-release file data source of the Linux distribution.
For details, see :func:`distro.os_release_info`.
"""
return self._os_release_info
def lsb_release_info(self):
"""
Return a dictionary containing key-value pairs for the information
items from the lsb_release command data source of the Linux
distribution.
For details, see :func:`distro.lsb_release_info`.
"""
return self._lsb_release_info
def distro_release_info(self):
"""
Return a dictionary containing key-value pairs for the information
items from the distro release file data source of the Linux
distribution.
For details, see :func:`distro.distro_release_info`.
"""
return self._distro_release_info
def os_release_attr(self, attribute):
"""
Return a single named information item from the os-release file data
source of the Linux distribution.
For details, see :func:`distro.os_release_attr`.
"""
return self._os_release_info.get(attribute, '')
def lsb_release_attr(self, attribute):
"""
Return a single named information item from the lsb_release command
output data source of the Linux distribution.
For details, see :func:`distro.lsb_release_attr`.
"""
return self._lsb_release_info.get(attribute, '')
def distro_release_attr(self, attribute):
"""
Return a single named information item from the distro release file
data source of the Linux distribution.
For details, see :func:`distro.distro_release_attr`.
"""
return self._distro_release_info.get(attribute, '')
def _get_os_release_info(self):
"""
Get the information items from the specified os-release file.
Returns:
A dictionary containing all information items.
"""
if os.path.isfile(self.os_release_file):
with open(self.os_release_file) as release_file:
return self._parse_os_release_content(release_file)
return {}
@staticmethod
def _parse_os_release_content(lines):
"""
Parse the lines of an os-release file.
Parameters:
* lines: Iterable through the lines in the os-release file.
Each line must be a unicode string or a UTF-8 encoded byte
string.
Returns:
A dictionary containing all information items.
"""
props = {}
lexer = shlex.shlex(lines, posix=True)
lexer.whitespace_split = True
# The shlex module defines its `wordchars` variable using literals,
# making it dependent on the encoding of the Python source file.
# In Python 2.6 and 2.7, the shlex source file is encoded in
# 'iso-8859-1', and the `wordchars` variable is defined as a byte
# string. This causes a UnicodeDecodeError to be raised when the
# parsed content is a unicode object. The following fix resolves that
# (... but it should be fixed in shlex...):
if sys.version_info[0] == 2 and isinstance(lexer.wordchars, bytes):
lexer.wordchars = lexer.wordchars.decode('iso-8859-1')
tokens = list(lexer)
for token in tokens:
# At this point, all shell-like parsing has been done (i.e.
# comments processed, quotes and backslash escape sequences
# processed, multi-line values assembled, trailing newlines
# stripped, etc.), so the tokens are now either:
# * variable assignments: var=value
# * commands or their arguments (not allowed in os-release)
if '=' in token:
k, v = token.split('=', 1)
if isinstance(v, bytes):
v = v.decode('utf-8')
props[k.lower()] = v
if k == 'VERSION':
# this handles cases in which the codename is in
# the `(CODENAME)` (rhel, centos, fedora) format
# or in the `, CODENAME` format (Ubuntu).
codename = re.search(r'(\(\D+\))|,(\s+)?\D+', v)
if codename:
codename = codename.group()
codename = codename.strip('()')
codename = codename.strip(',')
codename = codename.strip()
# codename appears within paranthese.
props['codename'] = codename
else:
props['codename'] = ''
else:
# Ignore any tokens that are not variable assignments
pass
return props
def _get_lsb_release_info(self):
"""
Get the information items from the lsb_release command output.
Returns:
A dictionary containing all information items.
"""
cmd = 'lsb_release -a'
process = subprocess.Popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
stdout, stderr = stdout.decode('utf-8'), stderr.decode('utf-8')
code = process.returncode
if code == 0:
content = stdout.splitlines()
return self._parse_lsb_release_content(content)
elif code == 127: # Command not found
return {}
else:
if sys.version_info[:2] >= (3, 5):
raise subprocess.CalledProcessError(code, cmd, stdout, stderr)
elif sys.version_info[:2] >= (2, 7):
raise subprocess.CalledProcessError(code, cmd, stdout)
elif sys.version_info[:2] == (2, 6):
raise subprocess.CalledProcessError(code, cmd)
@staticmethod
def _parse_lsb_release_content(lines):
"""
Parse the output of the lsb_release command.
Parameters:
* lines: Iterable through the lines of the lsb_release output.
Each line must be a unicode string or a UTF-8 encoded byte
string.
Returns:
A dictionary containing all information items.
"""
props = {}
for line in lines:
line = line.decode('utf-8') if isinstance(line, bytes) else line
kv = line.strip('\n').split(':', 1)
if len(kv) != 2:
# Ignore lines without colon.
continue
k, v = kv
props.update({k.replace(' ', '_').lower(): v.strip()})
return props
def _get_distro_release_info(self):
"""
Get the information items from the specified distro release file.
Returns:
A dictionary containing all information items.
"""
if self.distro_release_file:
# If it was specified, we use it and parse what we can, even if
# its file name or content does not match the expected pattern.
distro_info = self._parse_distro_release_file(
self.distro_release_file)
basename = os.path.basename(self.distro_release_file)
# The file name pattern for user-specified distro release files
# is somewhat more tolerant (compared to when searching for the
# file), because we want to use what was specified as best as
# possible.
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
if match:
distro_info['id'] = match.group(1)
return distro_info
else:
basenames = os.listdir(_UNIXCONFDIR)
# We sort for repeatability in cases where there are multiple
# distro specific files; e.g. CentOS, Oracle, Enterprise all
# containing `redhat-release` on top of their own.
basenames.sort()
for basename in basenames:
if basename in _DISTRO_RELEASE_IGNORE_BASENAMES:
continue
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
if match:
filepath = os.path.join(_UNIXCONFDIR, basename)
distro_info = self._parse_distro_release_file(filepath)
if 'name' in distro_info:
# The name is always present if the pattern matches
self.distro_release_file = filepath
distro_info['id'] = match.group(1)
return distro_info
return {}
def _parse_distro_release_file(self, filepath):
"""
Parse a distro release file.
Parameters:
* filepath: Path name of the distro release file.
Returns:
A dictionary containing all information items.
"""
if os.path.isfile(filepath):
with open(filepath) as fp:
# Only parse the first line. For instance, on SLES there
# are multiple lines. We don't want them...
return self._parse_distro_release_content(fp.readline())
return {}
@staticmethod
def _parse_distro_release_content(line):
"""
Parse a line from a distro release file.
Parameters:
* line: Line from the distro release file. Must be a unicode string
or a UTF-8 encoded byte string.
Returns:
A dictionary containing all information items.
"""
if isinstance(line, bytes):
line = line.decode('utf-8')
matches = _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN.match(
line.strip()[::-1])
distro_info = {}
if matches:
# regexp ensures non-None
distro_info['name'] = matches.group(3)[::-1]
if matches.group(2):
distro_info['version_id'] = matches.group(2)[::-1]
if matches.group(1):
distro_info['codename'] = matches.group(1)[::-1]
elif line:
distro_info['name'] = line.strip()
return distro_info
_distro = LinuxDistribution()
def main():
import argparse
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))
parser = argparse.ArgumentParser(description="Linux distro info tool")
parser.add_argument(
'--json',
'-j',
help="Output in machine readable format",
action="store_true")
args = parser.parse_args()
if args.json:
logger.info(json.dumps(info(), indent=4, sort_keys=True))
else:
logger.info('Name: %s', name(pretty=True))
distribution_version = version(pretty=True)
if distribution_version:
logger.info('Version: %s', distribution_version)
distribution_codename = codename()
if distribution_codename:
logger.info('Codename: %s', distribution_codename)
if __name__ == '__main__':
main()
"""
HTML parsing library based on the WHATWG "HTML5"
specification. The parser is designed to be compatible with existing
HTML found in the wild and implements well-defined error recovery that
is largely compatible with modern desktop web browsers.
Example usage:
import html5lib
f = open("my_document.html")
tree = html5lib.parse(f)
"""
from __future__ import absolute_import, division, unicode_literals
from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
from .treewalkers import getTreeWalker
from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
# this has to be at the top level, see how setup.py parses this
__version__ = "1.0b10"
from __future__ import absolute_import, division, unicode_literals
import re
import warnings
from .constants import DataLossWarning
baseChar = """
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
combiningCharacter = """
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
#x3099 | #x309A"""
digit = """
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
extender = """
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
letter = " | ".join([baseChar, ideographic])
# Without the
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
extender])
nameFirst = " | ".join([letter, "_"])
reChar = re.compile(r"#x([\d|A-F]{4,4})")
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
def charStringToList(chars):
charRanges = [item.strip() for item in chars.split(" | ")]
rv = []
for item in charRanges:
foundMatch = False
for regexp in (reChar, reCharRange):
match = regexp.match(item)
if match is not None:
rv.append([hexToInt(item) for item in match.groups()])
if len(rv[-1]) == 1:
rv[-1] = rv[-1] * 2
foundMatch = True
break
if not foundMatch:
assert len(item) == 1
rv.append([ord(item)] * 2)
rv = normaliseCharList(rv)
return rv
def normaliseCharList(charList):
charList = sorted(charList)
for item in charList:
assert item[1] >= item[0]
rv = []
i = 0
while i < len(charList):
j = 1
rv.append(charList[i])
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
rv[-1][1] = charList[i + j][1]
j += 1
i += j
return rv
# We don't really support characters above the BMP :(
max_unicode = int("FFFF", 16)
def missingRanges(charList):
rv = []
if charList[0] != 0:
rv.append([0, charList[0][0] - 1])
for i, item in enumerate(charList[:-1]):
rv.append([item[1] + 1, charList[i + 1][0] - 1])
if charList[-1][1] != max_unicode:
rv.append([charList[-1][1] + 1, max_unicode])
return rv
def listToRegexpStr(charList):
rv = []
for item in charList:
if item[0] == item[1]:
rv.append(escapeRegexp(chr(item[0])))
else:
rv.append(escapeRegexp(chr(item[0])) + "-" +
escapeRegexp(chr(item[1])))
return "[%s]" % "".join(rv)
def hexToInt(hex_str):
return int(hex_str, 16)
def escapeRegexp(string):
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
"[", "]", "|", "(", ")", "-")
for char in specialCharacters:
string = string.replace(char, "\\" + char)
return string
# output from the above
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
# Simpler things
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
class InfosetFilter(object):
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
def __init__(self,
dropXmlnsLocalName=False,
dropXmlnsAttrNs=False,
preventDoubleDashComments=False,
preventDashAtCommentEnd=False,
replaceFormFeedCharacters=True,
preventSingleQuotePubid=False):
self.dropXmlnsLocalName = dropXmlnsLocalName
self.dropXmlnsAttrNs = dropXmlnsAttrNs
self.preventDoubleDashComments = preventDoubleDashComments
self.preventDashAtCommentEnd = preventDashAtCommentEnd
self.replaceFormFeedCharacters = replaceFormFeedCharacters
self.preventSingleQuotePubid = preventSingleQuotePubid
self.replaceCache = {}
def coerceAttribute(self, name, namespace=None):
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
return None
elif (self.dropXmlnsAttrNs and
namespace == "http://www.w3.org/2000/xmlns/"):
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
return None
else:
return self.toXmlName(name)
def coerceElement(self, name):
return self.toXmlName(name)
def coerceComment(self, data):
if self.preventDoubleDashComments:
while "--" in data:
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
if data.endswith("-"):
warnings.warn("Comments cannot end in a dash", DataLossWarning)
data += " "
return data
def coerceCharacters(self, data):
if self.replaceFormFeedCharacters:
for _ in range(data.count("\x0C")):
warnings.warn("Text cannot contain U+000C", DataLossWarning)
data = data.replace("\x0C", " ")
# Other non-xml characters
return data
def coercePubid(self, data):
dataOutput = data
for char in nonPubidCharRegexp.findall(data):
warnings.warn("Coercing non-XML pubid", DataLossWarning)
replacement = self.getReplacementCharacter(char)
dataOutput = dataOutput.replace(char, replacement)
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
return dataOutput
def toXmlName(self, name):
nameFirst = name[0]
nameRest = name[1:]
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
if m:
warnings.warn("Coercing non-XML name", DataLossWarning)
nameFirstOutput = self.getReplacementCharacter(nameFirst)
else:
nameFirstOutput = nameFirst
nameRestOutput = nameRest
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
for char in replaceChars:
warnings.warn("Coercing non-XML name", DataLossWarning)
replacement = self.getReplacementCharacter(char)
nameRestOutput = nameRestOutput.replace(char, replacement)
return nameFirstOutput + nameRestOutput
def getReplacementCharacter(self, char):
if char in self.replaceCache:
replacement = self.replaceCache[char]
else:
replacement = self.escapeChar(char)
return replacement
def fromXmlName(self, name):
for item in set(self.replacementRegexp.findall(name)):
name = name.replace(item, self.unescapeChar(item))
return name
def escapeChar(self, char):
replacement = "U%05X" % ord(char)
self.replaceCache[char] = replacement
return replacement
def unescapeChar(self, charcode):
return chr(int(charcode[1:], 16))
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type, binary_type
from pip._vendor.six.moves import http_client, urllib
import codecs
import re
from pip._vendor import webencodings
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import ReparseException
from . import _utils
from io import StringIO
try:
from io import BytesIO
except ImportError:
BytesIO = StringIO
# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
if _utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# eval. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
# surrogates.
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
"]")
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
0x10FFFE, 0x10FFFF])
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
# Cache for charsUntil()
charsUntilRegEx = {}
class BufferedStream(object):
"""Buffering for streams that do not have buffering of their own
The buffer is implemented as a list of chunks on the assumption that
joining many strings will be slow since it is O(n**2)
"""
def __init__(self, stream):
self.stream = stream
self.buffer = []
self.position = [-1, 0] # chunk number, offset
def tell(self):
pos = 0
for chunk in self.buffer[:self.position[0]]:
pos += len(chunk)
pos += self.position[1]
return pos
def seek(self, pos):
assert pos <= self._bufferedBytes()
offset = pos
i = 0
while len(self.buffer[i]) < offset:
offset -= len(self.buffer[i])
i += 1
self.position = [i, offset]
def read(self, bytes):
if not self.buffer:
return self._readStream(bytes)
elif (self.position[0] == len(self.buffer) and
self.position[1] == len(self.buffer[-1])):
return self._readStream(bytes)
else:
return self._readFromBuffer(bytes)
def _bufferedBytes(self):
return sum([len(item) for item in self.buffer])
def _readStream(self, bytes):
data = self.stream.read(bytes)
self.buffer.append(data)
self.position[0] += 1
self.position[1] = len(data)
return data
def _readFromBuffer(self, bytes):
remainingBytes = bytes
rv = []
bufferIndex = self.position[0]
bufferOffset = self.position[1]
while bufferIndex < len(self.buffer) and remainingBytes != 0:
assert remainingBytes > 0
bufferedData = self.buffer[bufferIndex]
if remainingBytes <= len(bufferedData) - bufferOffset:
bytesToRead = remainingBytes
self.position = [bufferIndex, bufferOffset + bytesToRead]
else:
bytesToRead = len(bufferedData) - bufferOffset
self.position = [bufferIndex, len(bufferedData)]
bufferIndex += 1
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
remainingBytes -= bytesToRead
bufferOffset = 0
if remainingBytes:
rv.append(self._readStream(remainingBytes))
return b"".join(rv)
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
if (isinstance(source, http_client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
isinstance(source.fp, http_client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type)
else:
isUnicode = isinstance(source, text_type)
if isUnicode:
encodings = [x for x in kwargs if x.endswith("_encoding")]
if encodings:
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
return HTMLUnicodeInputStream(source, **kwargs)
else:
return HTMLBinaryInputStream(source, **kwargs)
class HTMLUnicodeInputStream(object):
"""Provides a unicode stream of characters to the HTMLTokenizer.
This class takes care of character encoding and removing or replacing
incorrect byte-sequences and also provides column and line tracking.
"""
_defaultChunkSize = 10240
def __init__(self, source):
"""Initialises the HTMLInputStream.
HTMLInputStream(source, [encoding]) -> Normalized stream from source
for use by html5lib.
source can be either a file-object, local filename or a string.
The optional encoding parameter must be a string that indicates
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element)
"""
if not _utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
elif len("\U0010FFFF") == 1:
self.reportCharacterErrors = self.characterErrorsUCS4
else:
self.reportCharacterErrors = self.characterErrorsUCS2
# List of where new lines occur
self.newLines = [0]
self.charEncoding = (lookupEncoding("utf-8"), "certain")
self.dataStream = self.openStream(source)
self.reset()
def reset(self):
self.chunk = ""
self.chunkSize = 0
self.chunkOffset = 0
self.errors = []
# number of (complete) lines in previous chunks
self.prevNumLines = 0
# number of columns in the last line of the previous chunk
self.prevNumCols = 0
# Deal with CR LF and surrogates split over chunk boundaries
self._bufferedCharacter = None
def openStream(self, source):
"""Produces a file object from source.
source can be either a file object, local filename or a string.
"""
# Already a file object
if hasattr(source, 'read'):
stream = source
else:
stream = StringIO(source)
return stream
def _position(self, offset):
chunk = self.chunk
nLines = chunk.count('\n', 0, offset)
positionLine = self.prevNumLines + nLines
lastLinePos = chunk.rfind('\n', 0, offset)
if lastLinePos == -1:
positionColumn = self.prevNumCols + offset
else:
positionColumn = offset - (lastLinePos + 1)
return (positionLine, positionColumn)
def position(self):
"""Returns (line, col) of the current position in the stream."""
line, col = self._position(self.chunkOffset)
return (line + 1, col)
def char(self):
""" Read one character from the stream or queue if available. Return
EOF when EOF is reached.
"""
# Read a new chunk from the input stream if necessary
if self.chunkOffset >= self.chunkSize:
if not self.readChunk():
return EOF
chunkOffset = self.chunkOffset
char = self.chunk[chunkOffset]
self.chunkOffset = chunkOffset + 1
return char
def readChunk(self, chunkSize=None):
if chunkSize is None:
chunkSize = self._defaultChunkSize
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
self.chunk = ""
self.chunkSize = 0
self.chunkOffset = 0
data = self.dataStream.read(chunkSize)
# Deal with CR LF and surrogates broken across chunks
if self._bufferedCharacter:
data = self._bufferedCharacter + data
self._bufferedCharacter = None
elif not data:
# We have no more data, bye-bye stream
return False
if len(data) > 1:
lastv = ord(data[-1])
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
self._bufferedCharacter = data[-1]
data = data[:-1]
if self.reportCharacterErrors:
self.reportCharacterErrors(data)
# Replace invalid characters
data = data.replace("\r\n", "\n")
data = data.replace("\r", "\n")
self.chunk = data
self.chunkSize = len(data)
return True
def characterErrorsUCS4(self, data):
for _ in range(len(invalid_unicode_re.findall(data))):
self.errors.append("invalid-codepoint")
def characterErrorsUCS2(self, data):
# Someone picked the wrong compile option
# You lose
skip = False
for match in invalid_unicode_re.finditer(data):
if skip:
continue
codepoint = ord(match.group())
pos = match.start()
# Pretty sure there should be endianness issues here
if _utils.isSurrogatePair(data[pos:pos + 2]):
# We have a surrogate pair!
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
if char_val in non_bmp_invalid_codepoints:
self.errors.append("invalid-codepoint")
skip = True
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
pos == len(data) - 1):
self.errors.append("invalid-codepoint")
else:
skip = False
self.errors.append("invalid-codepoint")
def charsUntil(self, characters, opposite=False):
""" Returns a string of characters from the stream up to but not
including any character in 'characters' or EOF. 'characters' must be
a container that supports the 'in' method and iteration over its
characters.
"""
# Use a cache of regexps to find the required characters
try:
chars = charsUntilRegEx[(characters, opposite)]
except KeyError:
if __debug__:
for c in characters:
assert(ord(c) < 128)
regex = "".join(["\\x%02x" % ord(c) for c in characters])
if not opposite:
regex = "^%s" % regex
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
rv = []
while True:
# Find the longest matching prefix
m = chars.match(self.chunk, self.chunkOffset)
if m is None:
# If nothing matched, and it wasn't because we ran out of chunk,
# then stop
if self.chunkOffset != self.chunkSize:
break
else:
end = m.end()
# If not the whole chunk matched, return everything
# up to the part that didn't match
if end != self.chunkSize:
rv.append(self.chunk[self.chunkOffset:end])
self.chunkOffset = end
break
# If the whole remainder of the chunk matched,
# use it all and read the next chunk
rv.append(self.chunk[self.chunkOffset:])
if not self.readChunk():
# Reached EOF
break
r = "".join(rv)
return r
def unget(self, char):
# Only one character is allowed to be ungotten at once - it must
# be consumed again before any further call to unget
if char is not None:
if self.chunkOffset == 0:
# unget is called quite rarely, so it's a good idea to do
# more work here if it saves a bit of work in the frequently
# called char and charsUntil.
# So, just prepend the ungotten character onto the current
# chunk:
self.chunk = char + self.chunk
self.chunkSize += 1
else:
self.chunkOffset -= 1
assert self.chunk[self.chunkOffset] == char
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
"""Provides a unicode stream of characters to the HTMLTokenizer.
This class takes care of character encoding and removing or replacing
incorrect byte-sequences and also provides column and line tracking.
"""
def __init__(self, source, override_encoding=None, transport_encoding=None,
same_origin_parent_encoding=None, likely_encoding=None,
default_encoding="windows-1252", useChardet=True):
"""Initialises the HTMLInputStream.
HTMLInputStream(source, [encoding]) -> Normalized stream from source
for use by html5lib.
source can be either a file-object, local filename or a string.
The optional encoding parameter must be a string that indicates
the encoding. If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element)
"""
# Raw Stream - for unicode objects this will encode to utf-8 and set
# self.charEncoding as appropriate
self.rawStream = self.openStream(source)
HTMLUnicodeInputStream.__init__(self, self.rawStream)
# Encoding Information
# Number of bytes to use when looking for a meta element with
# encoding information
self.numBytesMeta = 1024
# Number of bytes to use when using detecting encoding using chardet
self.numBytesChardet = 100
# Things from args
self.override_encoding = override_encoding
self.transport_encoding = transport_encoding
self.same_origin_parent_encoding = same_origin_parent_encoding
self.likely_encoding = likely_encoding
self.default_encoding = default_encoding
# Determine encoding
self.charEncoding = self.determineEncoding(useChardet)
assert self.charEncoding[0] is not None
# Call superclass
self.reset()
def reset(self):
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
HTMLUnicodeInputStream.reset(self)
def openStream(self, source):
"""Produces a file object from source.
source can be either a file object, local filename or a string.
"""
# Already a file object
if hasattr(source, 'read'):
stream = source
else:
stream = BytesIO(source)
try:
stream.seek(stream.tell())
except: # pylint:disable=bare-except
stream = BufferedStream(stream)
return stream
def determineEncoding(self, chardet=True):
# BOMs take precedence over everything
# This will also read past the BOM if present
charEncoding = self.detectBOM(), "certain"
if charEncoding[0] is not None:
return charEncoding
# If we've been overriden, we've been overriden
charEncoding = lookupEncoding(self.override_encoding), "certain"
if charEncoding[0] is not None:
return charEncoding
# Now check the transport layer
charEncoding = lookupEncoding(self.transport_encoding), "certain"
if charEncoding[0] is not None:
return charEncoding
# Look for meta elements with encoding information
charEncoding = self.detectEncodingMeta(), "tentative"
if charEncoding[0] is not None:
return charEncoding
# Parent document encoding
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
return charEncoding
# "likely" encoding
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
if charEncoding[0] is not None:
return charEncoding
# Guess with chardet, if available
if chardet:
try:
from chardet.universaldetector import UniversalDetector
except ImportError:
pass
else:
buffers = []
detector = UniversalDetector()
while not detector.done:
buffer = self.rawStream.read(self.numBytesChardet)
assert isinstance(buffer, bytes)
if not buffer:
break
buffers.append(buffer)
detector.feed(buffer)
detector.close()
encoding = lookupEncoding(detector.result['encoding'])
self.rawStream.seek(0)
if encoding is not None:
return encoding, "tentative"
# Try the default encoding
charEncoding = lookupEncoding(self.default_encoding), "tentative"
if charEncoding[0] is not None:
return charEncoding
# Fallback to html5lib's default if even that hasn't worked
return lookupEncoding("windows-1252"), "tentative"
def changeEncoding(self, newEncoding):
assert self.charEncoding[1] != "certain"
newEncoding = lookupEncoding(newEncoding)
if newEncoding is None:
return
if newEncoding.name in ("utf-16be", "utf-16le"):
newEncoding = lookupEncoding("utf-8")
assert newEncoding is not None
elif newEncoding == self.charEncoding[0]:
self.charEncoding = (self.charEncoding[0], "certain")
else:
self.rawStream.seek(0)
self.charEncoding = (newEncoding, "certain")
self.reset()
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
an encoding can be determined from the BOM return the name of the
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
}
# Go to beginning of file and read in 4 bytes
string = self.rawStream.read(4)
assert isinstance(string, bytes)
# Try detecting the BOM using bytes from the string
encoding = bomDict.get(string[:3]) # UTF-8
seek = 3
if not encoding:
# Need to detect UTF-32 before UTF-16
encoding = bomDict.get(string) # UTF-32
seek = 4
if not encoding:
encoding = bomDict.get(string[:2]) # UTF-16
seek = 2
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
if encoding:
self.rawStream.seek(seek)
return lookupEncoding(encoding)
else:
self.rawStream.seek(0)
return None
def detectEncodingMeta(self):
"""Report the encoding declared by the meta element
"""
buffer = self.rawStream.read(self.numBytesMeta)
assert isinstance(buffer, bytes)
parser = EncodingParser(buffer)
self.rawStream.seek(0)
encoding = parser.getEncoding()
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
encoding = lookupEncoding("utf-8")
return encoding
class EncodingBytes(bytes):
"""String-like object with an associated position and various extra methods
If the position is ever greater than the string length then an exception is
raised"""
def __new__(self, value):
assert isinstance(value, bytes)
return bytes.__new__(self, value.lower())
def __init__(self, value):
# pylint:disable=unused-argument
self._position = -1
def __iter__(self):
return self
def __next__(self):
p = self._position = self._position + 1
if p >= len(self):
raise StopIteration
elif p < 0:
raise TypeError
return self[p:p + 1]
def next(self):
# Py2 compat
return self.__next__()
def previous(self):
p = self._position
if p >= len(self):
raise StopIteration
elif p < 0:
raise TypeError
self._position = p = p - 1
return self[p:p + 1]
def setPosition(self, position):
if self._position >= len(self):
raise StopIteration
self._position = position
def getPosition(self):
if self._position >= len(self):
raise StopIteration
if self._position >= 0:
return self._position
else:
return None
position = property(getPosition, setPosition)
def getCurrentByte(self):
return self[self.position:self.position + 1]
currentByte = property(getCurrentByte)
def skip(self, chars=spaceCharactersBytes):
"""Skip past a list of characters"""
p = self.position # use property for the error-checking
while p < len(self):
c = self[p:p + 1]
if c not in chars:
self._position = p
return c
p += 1
self._position = p
return None
def skipUntil(self, chars):
p = self.position
while p < len(self):
c = self[p:p + 1]
if c in chars:
self._position = p
return c
p += 1
self._position = p
return None
def matchBytes(self, bytes):
"""Look for a sequence of bytes at the start of a string. If the bytes
are found return True and advance the position to the byte after the
match. Otherwise return False and leave the position alone"""
p = self.position
data = self[p:p + len(bytes)]
rv = data.startswith(bytes)
if rv:
self.position += len(bytes)
return rv
def jumpTo(self, bytes):
"""Look for the next sequence of bytes matching a given sequence. If
a match is found advance the position to the last byte of the match"""
newPosition = self[self.position:].find(bytes)
if newPosition > -1:
# XXX: This is ugly, but I can't see a nicer way to fix this.
if self._position == -1:
self._position = 0
self._position += (newPosition + len(bytes) - 1)
return True
else:
raise StopIteration
class EncodingParser(object):
"""Mini parser for detecting character encoding from meta elements"""
def __init__(self, data):
"""string - the data to work on for encoding detection"""
self.data = EncodingBytes(data)
self.encoding = None
def getEncoding(self):
methodDispatch = (
(b"<!--", self.handleComment),
(b"<meta", self.handleMeta),
(b"</", self.handlePossibleEndTag),
(b"<!", self.handleOther),
(b"<?", self.handleOther),
(b"<", self.handlePossibleStartTag))
for _ in self.data:
keepParsing = True
for key, method in methodDispatch:
if self.data.matchBytes(key):
try:
keepParsing = method()
break
except StopIteration:
keepParsing = False
break
if not keepParsing:
break
return self.encoding
def handleComment(self):
"""Skip over comments"""
return self.data.jumpTo(b"-->")
def handleMeta(self):
if self.data.currentByte not in spaceCharactersBytes:
# if we have <meta not followed by a space so just keep going
return True
# We have a valid meta element we want to search for attributes
hasPragma = False
pendingEncoding = None
while True:
# Try to find the next attribute after the current position
attr = self.getAttribute()
if attr is None:
return True
else:
if attr[0] == b"http-equiv":
hasPragma = attr[1] == b"content-type"
if hasPragma and pendingEncoding is not None:
self.encoding = pendingEncoding
return False
elif attr[0] == b"charset":
tentativeEncoding = attr[1]
codec = lookupEncoding(tentativeEncoding)
if codec is not None:
self.encoding = codec
return False
elif attr[0] == b"content":
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
tentativeEncoding = contentParser.parse()
if tentativeEncoding is not None:
codec = lookupEncoding(tentativeEncoding)
if codec is not None:
if hasPragma:
self.encoding = codec
return False
else:
pendingEncoding = codec
def handlePossibleStartTag(self):
return self.handlePossibleTag(False)
def handlePossibleEndTag(self):
next(self.data)
return self.handlePossibleTag(True)
def handlePossibleTag(self, endTag):
data = self.data
if data.currentByte not in asciiLettersBytes:
# If the next byte is not an ascii letter either ignore this
# fragment (possible start tag case) or treat it according to
# handleOther
if endTag:
data.previous()
self.handleOther()
return True
c = data.skipUntil(spacesAngleBrackets)
if c == b"<":
# return to the first step in the overall "two step" algorithm
# reprocessing the < byte
data.previous()
else:
# Read all attributes
attr = self.getAttribute()
while attr is not None:
attr = self.getAttribute()
return True
def handleOther(self):
return self.data.jumpTo(b">")
def getAttribute(self):
"""Return a name,value pair for the next attribute in the stream,
if one is found, or None"""
data = self.data
# Step 1 (skip chars)
c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
assert c is None or len(c) == 1
# Step 2
if c in (b">", None):
return None
# Step 3
attrName = []
attrValue = []
# Step 4 attribute name
while True:
if c == b"=" and attrName:
break
elif c in spaceCharactersBytes:
# Step 6!
c = data.skip()
break
elif c in (b"/", b">"):
return b"".join(attrName), b""
elif c in asciiUppercaseBytes:
attrName.append(c.lower())
elif c is None:
return None
else:
attrName.append(c)
# Step 5
c = next(data)
# Step 7
if c != b"=":
data.previous()
return b"".join(attrName), b""
# Step 8
next(data)
# Step 9
c = data.skip()
# Step 10
if c in (b"'", b'"'):
# 10.1
quoteChar = c
while True:
# 10.2
c = next(data)
# 10.3
if c == quoteChar:
next(data)
return b"".join(attrName), b"".join(attrValue)
# 10.4
elif c in asciiUppercaseBytes:
attrValue.append(c.lower())
# 10.5
else:
attrValue.append(c)
elif c == b">":
return b"".join(attrName), b""
elif c in asciiUppercaseBytes:
attrValue.append(c.lower())
elif c is None:
return None
else:
attrValue.append(c)
# Step 11
while True:
c = next(data)
if c in spacesAngleBrackets:
return b"".join(attrName), b"".join(attrValue)
elif c in asciiUppercaseBytes:
attrValue.append(c.lower())
elif c is None:
return None
else:
attrValue.append(c)
class ContentAttrParser(object):
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
def parse(self):
try:
# Check if the attr name is charset
# otherwise return
self.data.jumpTo(b"charset")
self.data.position += 1
self.data.skip()
if not self.data.currentByte == b"=":
# If there is no = sign keep looking for attrs
return None
self.data.position += 1
self.data.skip()
# Look for an encoding between matching quote marks
if self.data.currentByte in (b'"', b"'"):
quoteMark = self.data.currentByte
self.data.position += 1
oldPosition = self.data.position
if self.data.jumpTo(quoteMark):
return self.data[oldPosition:self.data.position]
else:
return None
else:
# Unquoted value
oldPosition = self.data.position
try:
self.data.skipUntil(spaceCharactersBytes)
return self.data[oldPosition:self.data.position]
except StopIteration:
# Return the whole remaining value
return self.data[oldPosition:]
except StopIteration:
return None
def lookupEncoding(encoding):
"""Return the python codec name corresponding to an encoding or None if the
string doesn't correspond to a valid encoding."""
if isinstance(encoding, binary_type):
try:
encoding = encoding.decode("ascii")
except UnicodeDecodeError:
return None
if encoding is not None:
try:
return webencodings.lookup(encoding)
except AttributeError:
return None
else:
return None
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import unichr as chr
from collections import deque
from .constants import spaceCharacters
from .constants import entities
from .constants import asciiLetters, asciiUpper2Lower
from .constants import digits, hexDigits, EOF
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters
from ._inputstream import HTMLInputStream
from ._trie import Trie
entitiesTrie = Trie(entities)
class HTMLTokenizer(object):
""" This class takes care of tokenizing HTML.
* self.currentToken
Holds the token that is currently being processed.
* self.state
Holds a reference to the method to be invoked... XXX
* self.stream
Points to HTMLInputStream object.
"""
def __init__(self, stream, parser=None, **kwargs):
self.stream = HTMLInputStream(stream, **kwargs)
self.parser = parser
# Setup the initial tokenizer state
self.escapeFlag = False
self.lastFourChars = []
self.state = self.dataState
self.escape = False
# The current token being created
self.currentToken = None
super(HTMLTokenizer, self).__init__()
def __iter__(self):
""" This is where the magic happens.
We do our usually processing through the states and when we have a token
to return we yield the token which pauses processing until the next token
is requested.
"""
self.tokenQueue = deque([])
# Start processing. When EOF is reached self.state will return False
# instead of True and the loop will terminate.
while self.state():
while self.stream.errors:
yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
while self.tokenQueue:
yield self.tokenQueue.popleft()
def consumeNumberEntity(self, isHex):
"""This function returns either U+FFFD or the character based on the
decimal or hexadecimal representation. It also discards ";" if present.
If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
"""
allowed = digits
radix = 10
if isHex:
allowed = hexDigits
radix = 16
charStack = []
# Consume all the characters that are in range while making sure we
# don't hit an EOF.
c = self.stream.char()
while c in allowed and c is not EOF:
charStack.append(c)
c = self.stream.char()
# Convert the set of characters consumed to an int.
charAsInt = int("".join(charStack), radix)
# Certain characters get replaced with others
if charAsInt in replacementCharacters:
char = replacementCharacters[charAsInt]
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"illegal-codepoint-for-numeric-entity",
"datavars": {"charAsInt": charAsInt}})
elif ((0xD800 <= charAsInt <= 0xDFFF) or
(charAsInt > 0x10FFFF)):
char = "\uFFFD"
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"illegal-codepoint-for-numeric-entity",
"datavars": {"charAsInt": charAsInt}})
else:
# Should speed up this check somehow (e.g. move the set to a constant)
if ((0x0001 <= charAsInt <= 0x0008) or
(0x000E <= charAsInt <= 0x001F) or
(0x007F <= charAsInt <= 0x009F) or
(0xFDD0 <= charAsInt <= 0xFDEF) or
charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
0xFFFFF, 0x10FFFE, 0x10FFFF])):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data":
"illegal-codepoint-for-numeric-entity",
"datavars": {"charAsInt": charAsInt}})
try:
# Try/except needed as UCS-2 Python builds' unichar only works
# within the BMP.
char = chr(charAsInt)
except ValueError:
v = charAsInt - 0x10000
char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF))
# Discard the ; if present. Otherwise, put it back on the queue and
# invoke parseError on parser.
if c != ";":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"numeric-entity-without-semicolon"})
self.stream.unget(c)
return char
def consumeEntity(self, allowedChar=None, fromAttribute=False):
# Initialise to the default output for when no entity is matched
output = "&"
charStack = [self.stream.char()]
if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
(allowedChar is not None and allowedChar == charStack[0])):
self.stream.unget(charStack[0])
elif charStack[0] == "#":
# Read the next character to see if it's hex or decimal
hex = False
charStack.append(self.stream.char())
if charStack[-1] in ("x", "X"):
hex = True
charStack.append(self.stream.char())
# charStack[-1] should be the first digit
if (hex and charStack[-1] in hexDigits) \
or (not hex and charStack[-1] in digits):
# At least one digit found, so consume the whole number
self.stream.unget(charStack[-1])
output = self.consumeNumberEntity(hex)
else:
# No digits found
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "expected-numeric-entity"})
self.stream.unget(charStack.pop())
output = "&" + "".join(charStack)
else:
# At this point in the process might have named entity. Entities
# are stored in the global variable "entities".
#
# Consume characters and compare to these to a substring of the
# entity names in the list until the substring no longer matches.
while (charStack[-1] is not EOF):
if not entitiesTrie.has_keys_with_prefix("".join(charStack)):
break
charStack.append(self.stream.char())
# At this point we have a string that starts with some characters
# that may match an entity
# Try to find the longest entity the string will match to take care
# of &noti for instance.
try:
entityName = entitiesTrie.longest_prefix("".join(charStack[:-1]))
entityLength = len(entityName)
except KeyError:
entityName = None
if entityName is not None:
if entityName[-1] != ";":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"named-entity-without-semicolon"})
if (entityName[-1] != ";" and fromAttribute and
(charStack[entityLength] in asciiLetters or
charStack[entityLength] in digits or
charStack[entityLength] == "=")):
self.stream.unget(charStack.pop())
output = "&" + "".join(charStack)
else:
output = entities[entityName]
self.stream.unget(charStack.pop())
output += "".join(charStack[entityLength:])
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-named-entity"})
self.stream.unget(charStack.pop())
output = "&" + "".join(charStack)
if fromAttribute:
self.currentToken["data"][-1][1] += output
else:
if output in spaceCharacters:
tokenType = "SpaceCharacters"
else:
tokenType = "Characters"
self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
def processEntityInAttribute(self, allowedChar):
"""This method replaces the need for "entityInAttributeValueState".
"""
self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
def emitCurrentToken(self):
"""This method is a generic handler for emitting the tags. It also sets
the state to "data" because that's what's needed after a token has been
emitted.
"""
token = self.currentToken
# Add token to the queue to be yielded
if (token["type"] in tagTokenTypes):
token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["EndTag"]:
if token["data"]:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "attributes-in-end-tag"})
if token["selfClosing"]:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "self-closing-flag-on-end-tag"})
self.tokenQueue.append(token)
self.state = self.dataState
# Below are the various tokenizer states worked out.
def dataState(self):
data = self.stream.char()
if data == "&":
self.state = self.entityDataState
elif data == "<":
self.state = self.tagOpenState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\u0000"})
elif data is EOF:
# Tokenization ends.
return False
elif data in spaceCharacters:
# Directly after emitting a token you switch back to the "data
# state". At that point spaceCharacters are important so they are
# emitted separately.
self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
data + self.stream.charsUntil(spaceCharacters, True)})
# No need to update lastFourChars here, since the first space will
# have already been appended to lastFourChars and will have broken
# any <!-- or --> sequences
else:
chars = self.stream.charsUntil(("&", "<", "\u0000"))
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + chars})
return True
def entityDataState(self):
self.consumeEntity()
self.state = self.dataState
return True
def rcdataState(self):
data = self.stream.char()
if data == "&":
self.state = self.characterReferenceInRcdata
elif data == "<":
self.state = self.rcdataLessThanSignState
elif data == EOF:
# Tokenization ends.
return False
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
elif data in spaceCharacters:
# Directly after emitting a token you switch back to the "data
# state". At that point spaceCharacters are important so they are
# emitted separately.
self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
data + self.stream.charsUntil(spaceCharacters, True)})
# No need to update lastFourChars here, since the first space will
# have already been appended to lastFourChars and will have broken
# any <!-- or --> sequences
else:
chars = self.stream.charsUntil(("&", "<", "\u0000"))
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + chars})
return True
def characterReferenceInRcdata(self):
self.consumeEntity()
self.state = self.rcdataState
return True
def rawtextState(self):
data = self.stream.char()
if data == "<":
self.state = self.rawtextLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
elif data == EOF:
# Tokenization ends.
return False
else:
chars = self.stream.charsUntil(("<", "\u0000"))
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + chars})
return True
def scriptDataState(self):
data = self.stream.char()
if data == "<":
self.state = self.scriptDataLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
elif data == EOF:
# Tokenization ends.
return False
else:
chars = self.stream.charsUntil(("<", "\u0000"))
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + chars})
return True
def plaintextState(self):
data = self.stream.char()
if data == EOF:
# Tokenization ends.
return False
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + self.stream.charsUntil("\u0000")})
return True
def tagOpenState(self):
data = self.stream.char()
if data == "!":
self.state = self.markupDeclarationOpenState
elif data == "/":
self.state = self.closeTagOpenState
elif data in asciiLetters:
self.currentToken = {"type": tokenTypes["StartTag"],
"name": data, "data": [],
"selfClosing": False,
"selfClosingAcknowledged": False}
self.state = self.tagNameState
elif data == ">":
# XXX In theory it could be something besides a tag name. But
# do we really care?
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-tag-name-but-got-right-bracket"})
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
self.state = self.dataState
elif data == "?":
# XXX In theory it could be something besides a tag name. But
# do we really care?
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-tag-name-but-got-question-mark"})
self.stream.unget(data)
self.state = self.bogusCommentState
else:
# XXX
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-tag-name"})
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.stream.unget(data)
self.state = self.dataState
return True
def closeTagOpenState(self):
data = self.stream.char()
if data in asciiLetters:
self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
"data": [], "selfClosing": False}
self.state = self.tagNameState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-closing-tag-but-got-right-bracket"})
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-closing-tag-but-got-eof"})
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
self.state = self.dataState
else:
# XXX data can be _'_...
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-closing-tag-but-got-char",
"datavars": {"data": data}})
self.stream.unget(data)
self.state = self.bogusCommentState
return True
def tagNameState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeAttributeNameState
elif data == ">":
self.emitCurrentToken()
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-tag-name"})
self.state = self.dataState
elif data == "/":
self.state = self.selfClosingStartTagState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["name"] += "\uFFFD"
else:
self.currentToken["name"] += data
# (Don't use charsUntil here, because tag names are
# very short and it's faster to not do anything fancy)
return True
def rcdataLessThanSignState(self):
data = self.stream.char()
if data == "/":
self.temporaryBuffer = ""
self.state = self.rcdataEndTagOpenState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.stream.unget(data)
self.state = self.rcdataState
return True
def rcdataEndTagOpenState(self):
data = self.stream.char()
if data in asciiLetters:
self.temporaryBuffer += data
self.state = self.rcdataEndTagNameState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
self.stream.unget(data)
self.state = self.rcdataState
return True
def rcdataEndTagNameState(self):
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
data = self.stream.char()
if data in spaceCharacters and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.beforeAttributeNameState
elif data == "/" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.selfClosingStartTagState
elif data == ">" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.emitCurrentToken()
self.state = self.dataState
elif data in asciiLetters:
self.temporaryBuffer += data
else:
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "</" + self.temporaryBuffer})
self.stream.unget(data)
self.state = self.rcdataState
return True
def rawtextLessThanSignState(self):
data = self.stream.char()
if data == "/":
self.temporaryBuffer = ""
self.state = self.rawtextEndTagOpenState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.stream.unget(data)
self.state = self.rawtextState
return True
def rawtextEndTagOpenState(self):
data = self.stream.char()
if data in asciiLetters:
self.temporaryBuffer += data
self.state = self.rawtextEndTagNameState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
self.stream.unget(data)
self.state = self.rawtextState
return True
def rawtextEndTagNameState(self):
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
data = self.stream.char()
if data in spaceCharacters and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.beforeAttributeNameState
elif data == "/" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.selfClosingStartTagState
elif data == ">" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.emitCurrentToken()
self.state = self.dataState
elif data in asciiLetters:
self.temporaryBuffer += data
else:
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "</" + self.temporaryBuffer})
self.stream.unget(data)
self.state = self.rawtextState
return True
def scriptDataLessThanSignState(self):
data = self.stream.char()
if data == "/":
self.temporaryBuffer = ""
self.state = self.scriptDataEndTagOpenState
elif data == "!":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
self.state = self.scriptDataEscapeStartState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.stream.unget(data)
self.state = self.scriptDataState
return True
def scriptDataEndTagOpenState(self):
data = self.stream.char()
if data in asciiLetters:
self.temporaryBuffer += data
self.state = self.scriptDataEndTagNameState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
self.stream.unget(data)
self.state = self.scriptDataState
return True
def scriptDataEndTagNameState(self):
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
data = self.stream.char()
if data in spaceCharacters and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.beforeAttributeNameState
elif data == "/" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.selfClosingStartTagState
elif data == ">" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.emitCurrentToken()
self.state = self.dataState
elif data in asciiLetters:
self.temporaryBuffer += data
else:
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "</" + self.temporaryBuffer})
self.stream.unget(data)
self.state = self.scriptDataState
return True
def scriptDataEscapeStartState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataEscapeStartDashState
else:
self.stream.unget(data)
self.state = self.scriptDataState
return True
def scriptDataEscapeStartDashState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataEscapedDashDashState
else:
self.stream.unget(data)
self.state = self.scriptDataState
return True
def scriptDataEscapedState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataEscapedDashState
elif data == "<":
self.state = self.scriptDataEscapedLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
elif data == EOF:
self.state = self.dataState
else:
chars = self.stream.charsUntil(("<", "-", "\u0000"))
self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
data + chars})
return True
def scriptDataEscapedDashState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataEscapedDashDashState
elif data == "<":
self.state = self.scriptDataEscapedLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
self.state = self.scriptDataEscapedState
elif data == EOF:
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.state = self.scriptDataEscapedState
return True
def scriptDataEscapedDashDashState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
elif data == "<":
self.state = self.scriptDataEscapedLessThanSignState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
self.state = self.scriptDataState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
self.state = self.scriptDataEscapedState
elif data == EOF:
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.state = self.scriptDataEscapedState
return True
def scriptDataEscapedLessThanSignState(self):
data = self.stream.char()
if data == "/":
self.temporaryBuffer = ""
self.state = self.scriptDataEscapedEndTagOpenState
elif data in asciiLetters:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
self.temporaryBuffer = data
self.state = self.scriptDataDoubleEscapeStartState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.stream.unget(data)
self.state = self.scriptDataEscapedState
return True
def scriptDataEscapedEndTagOpenState(self):
data = self.stream.char()
if data in asciiLetters:
self.temporaryBuffer = data
self.state = self.scriptDataEscapedEndTagNameState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
self.stream.unget(data)
self.state = self.scriptDataEscapedState
return True
def scriptDataEscapedEndTagNameState(self):
appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
data = self.stream.char()
if data in spaceCharacters and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.beforeAttributeNameState
elif data == "/" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.state = self.selfClosingStartTagState
elif data == ">" and appropriate:
self.currentToken = {"type": tokenTypes["EndTag"],
"name": self.temporaryBuffer,
"data": [], "selfClosing": False}
self.emitCurrentToken()
self.state = self.dataState
elif data in asciiLetters:
self.temporaryBuffer += data
else:
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "</" + self.temporaryBuffer})
self.stream.unget(data)
self.state = self.scriptDataEscapedState
return True
def scriptDataDoubleEscapeStartState(self):
data = self.stream.char()
if data in (spaceCharacters | frozenset(("/", ">"))):
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
if self.temporaryBuffer.lower() == "script":
self.state = self.scriptDataDoubleEscapedState
else:
self.state = self.scriptDataEscapedState
elif data in asciiLetters:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.temporaryBuffer += data
else:
self.stream.unget(data)
self.state = self.scriptDataEscapedState
return True
def scriptDataDoubleEscapedState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataDoubleEscapedDashState
elif data == "<":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.state = self.scriptDataDoubleEscapedLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
elif data == EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-script-in-script"})
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
return True
def scriptDataDoubleEscapedDashState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
self.state = self.scriptDataDoubleEscapedDashDashState
elif data == "<":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.state = self.scriptDataDoubleEscapedLessThanSignState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
self.state = self.scriptDataDoubleEscapedState
elif data == EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-script-in-script"})
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.state = self.scriptDataDoubleEscapedState
return True
def scriptDataDoubleEscapedDashDashState(self):
data = self.stream.char()
if data == "-":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
elif data == "<":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
self.state = self.scriptDataDoubleEscapedLessThanSignState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
self.state = self.scriptDataState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": "\uFFFD"})
self.state = self.scriptDataDoubleEscapedState
elif data == EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-script-in-script"})
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.state = self.scriptDataDoubleEscapedState
return True
def scriptDataDoubleEscapedLessThanSignState(self):
data = self.stream.char()
if data == "/":
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
self.temporaryBuffer = ""
self.state = self.scriptDataDoubleEscapeEndState
else:
self.stream.unget(data)
self.state = self.scriptDataDoubleEscapedState
return True
def scriptDataDoubleEscapeEndState(self):
data = self.stream.char()
if data in (spaceCharacters | frozenset(("/", ">"))):
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
if self.temporaryBuffer.lower() == "script":
self.state = self.scriptDataEscapedState
else:
self.state = self.scriptDataDoubleEscapedState
elif data in asciiLetters:
self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
self.temporaryBuffer += data
else:
self.stream.unget(data)
self.state = self.scriptDataDoubleEscapedState
return True
def beforeAttributeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
self.stream.charsUntil(spaceCharacters, True)
elif data in asciiLetters:
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
elif data == ">":
self.emitCurrentToken()
elif data == "/":
self.state = self.selfClosingStartTagState
elif data in ("'", '"', "=", "<"):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"invalid-character-in-attribute-name"})
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"].append(["\uFFFD", ""])
self.state = self.attributeNameState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-attribute-name-but-got-eof"})
self.state = self.dataState
else:
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
return True
def attributeNameState(self):
data = self.stream.char()
leavingThisState = True
emitToken = False
if data == "=":
self.state = self.beforeAttributeValueState
elif data in asciiLetters:
self.currentToken["data"][-1][0] += data +\
self.stream.charsUntil(asciiLetters, True)
leavingThisState = False
elif data == ">":
# XXX If we emit here the attributes are converted to a dict
# without being checked and when the code below runs we error
# because data is a dict not a list
emitToken = True
elif data in spaceCharacters:
self.state = self.afterAttributeNameState
elif data == "/":
self.state = self.selfClosingStartTagState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][0] += "\uFFFD"
leavingThisState = False
elif data in ("'", '"', "<"):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data":
"invalid-character-in-attribute-name"})
self.currentToken["data"][-1][0] += data
leavingThisState = False
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "eof-in-attribute-name"})
self.state = self.dataState
else:
self.currentToken["data"][-1][0] += data
leavingThisState = False
if leavingThisState:
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
self.currentToken["data"][-1][0] = (
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
for name, _ in self.currentToken["data"][:-1]:
if self.currentToken["data"][-1][0] == name:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"duplicate-attribute"})
break
# XXX Fix for above XXX
if emitToken:
self.emitCurrentToken()
return True
def afterAttributeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
self.stream.charsUntil(spaceCharacters, True)
elif data == "=":
self.state = self.beforeAttributeValueState
elif data == ">":
self.emitCurrentToken()
elif data in asciiLetters:
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
elif data == "/":
self.state = self.selfClosingStartTagState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"].append(["\uFFFD", ""])
self.state = self.attributeNameState
elif data in ("'", '"', "<"):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"invalid-character-after-attribute-name"})
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-end-of-tag-but-got-eof"})
self.state = self.dataState
else:
self.currentToken["data"].append([data, ""])
self.state = self.attributeNameState
return True
def beforeAttributeValueState(self):
data = self.stream.char()
if data in spaceCharacters:
self.stream.charsUntil(spaceCharacters, True)
elif data == "\"":
self.state = self.attributeValueDoubleQuotedState
elif data == "&":
self.state = self.attributeValueUnQuotedState
self.stream.unget(data)
elif data == "'":
self.state = self.attributeValueSingleQuotedState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-attribute-value-but-got-right-bracket"})
self.emitCurrentToken()
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][1] += "\uFFFD"
self.state = self.attributeValueUnQuotedState
elif data in ("=", "<", "`"):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"equals-in-unquoted-attribute-value"})
self.currentToken["data"][-1][1] += data
self.state = self.attributeValueUnQuotedState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-attribute-value-but-got-eof"})
self.state = self.dataState
else:
self.currentToken["data"][-1][1] += data
self.state = self.attributeValueUnQuotedState
return True
def attributeValueDoubleQuotedState(self):
data = self.stream.char()
if data == "\"":
self.state = self.afterAttributeValueState
elif data == "&":
self.processEntityInAttribute('"')
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][1] += "\uFFFD"
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-attribute-value-double-quote"})
self.state = self.dataState
else:
self.currentToken["data"][-1][1] += data +\
self.stream.charsUntil(("\"", "&", "\u0000"))
return True
def attributeValueSingleQuotedState(self):
data = self.stream.char()
if data == "'":
self.state = self.afterAttributeValueState
elif data == "&":
self.processEntityInAttribute("'")
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][1] += "\uFFFD"
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-attribute-value-single-quote"})
self.state = self.dataState
else:
self.currentToken["data"][-1][1] += data +\
self.stream.charsUntil(("'", "&", "\u0000"))
return True
def attributeValueUnQuotedState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeAttributeNameState
elif data == "&":
self.processEntityInAttribute(">")
elif data == ">":
self.emitCurrentToken()
elif data in ('"', "'", "=", "<", "`"):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-character-in-unquoted-attribute-value"})
self.currentToken["data"][-1][1] += data
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][1] += "\uFFFD"
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-attribute-value-no-quotes"})
self.state = self.dataState
else:
self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters)
return True
def afterAttributeValueState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeAttributeNameState
elif data == ">":
self.emitCurrentToken()
elif data == "/":
self.state = self.selfClosingStartTagState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-EOF-after-attribute-value"})
self.stream.unget(data)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-character-after-attribute-value"})
self.stream.unget(data)
self.state = self.beforeAttributeNameState
return True
def selfClosingStartTagState(self):
data = self.stream.char()
if data == ">":
self.currentToken["selfClosing"] = True
self.emitCurrentToken()
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data":
"unexpected-EOF-after-solidus-in-tag"})
self.stream.unget(data)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-character-after-solidus-in-tag"})
self.stream.unget(data)
self.state = self.beforeAttributeNameState
return True
def bogusCommentState(self):
# Make a new comment token and give it as value all the characters
# until the first > or EOF (charsUntil checks for EOF automatically)
# and emit it.
data = self.stream.charsUntil(">")
data = data.replace("\u0000", "\uFFFD")
self.tokenQueue.append(
{"type": tokenTypes["Comment"], "data": data})
# Eat the character directly after the bogus comment which is either a
# ">" or an EOF.
self.stream.char()
self.state = self.dataState
return True
def markupDeclarationOpenState(self):
charStack = [self.stream.char()]
if charStack[-1] == "-":
charStack.append(self.stream.char())
if charStack[-1] == "-":
self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
self.state = self.commentStartState
return True
elif charStack[-1] in ('d', 'D'):
matched = True
for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'),
('y', 'Y'), ('p', 'P'), ('e', 'E')):
charStack.append(self.stream.char())
if charStack[-1] not in expected:
matched = False
break
if matched:
self.currentToken = {"type": tokenTypes["Doctype"],
"name": "",
"publicId": None, "systemId": None,
"correct": True}
self.state = self.doctypeState
return True
elif (charStack[-1] == "[" and
self.parser is not None and
self.parser.tree.openElements and
self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
matched = True
for expected in ["C", "D", "A", "T", "A", "["]:
charStack.append(self.stream.char())
if charStack[-1] != expected:
matched = False
break
if matched:
self.state = self.cdataSectionState
return True
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-dashes-or-doctype"})
while charStack:
self.stream.unget(charStack.pop())
self.state = self.bogusCommentState
return True
def commentStartState(self):
data = self.stream.char()
if data == "-":
self.state = self.commentStartDashState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"incorrect-comment"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-comment"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["data"] += data
self.state = self.commentState
return True
def commentStartDashState(self):
data = self.stream.char()
if data == "-":
self.state = self.commentEndState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "-\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"incorrect-comment"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-comment"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["data"] += "-" + data
self.state = self.commentState
return True
def commentState(self):
data = self.stream.char()
if data == "-":
self.state = self.commentEndDashState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "\uFFFD"
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "eof-in-comment"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["data"] += data + \
self.stream.charsUntil(("-", "\u0000"))
return True
def commentEndDashState(self):
data = self.stream.char()
if data == "-":
self.state = self.commentEndState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "-\uFFFD"
self.state = self.commentState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-comment-end-dash"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["data"] += "-" + data
self.state = self.commentState
return True
def commentEndState(self):
data = self.stream.char()
if data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "--\uFFFD"
self.state = self.commentState
elif data == "!":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-bang-after-double-dash-in-comment"})
self.state = self.commentEndBangState
elif data == "-":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-dash-after-double-dash-in-comment"})
self.currentToken["data"] += data
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-comment-double-dash"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
# XXX
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-comment"})
self.currentToken["data"] += "--" + data
self.state = self.commentState
return True
def commentEndBangState(self):
data = self.stream.char()
if data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == "-":
self.currentToken["data"] += "--!"
self.state = self.commentEndDashState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"] += "--!\uFFFD"
self.state = self.commentState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-comment-end-bang-state"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["data"] += "--!" + data
self.state = self.commentState
return True
def doctypeState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeDoctypeNameState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-doctype-name-but-got-eof"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"need-space-after-doctype"})
self.stream.unget(data)
self.state = self.beforeDoctypeNameState
return True
def beforeDoctypeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-doctype-name-but-got-right-bracket"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["name"] = "\uFFFD"
self.state = self.doctypeNameState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-doctype-name-but-got-eof"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["name"] = data
self.state = self.doctypeNameState
return True
def doctypeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
self.state = self.afterDoctypeNameState
elif data == ">":
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["name"] += "\uFFFD"
self.state = self.doctypeNameState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype-name"})
self.currentToken["correct"] = False
self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["name"] += data
return True
def afterDoctypeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.currentToken["correct"] = False
self.stream.unget(data)
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
if data in ("p", "P"):
matched = True
for expected in (("u", "U"), ("b", "B"), ("l", "L"),
("i", "I"), ("c", "C")):
data = self.stream.char()
if data not in expected:
matched = False
break
if matched:
self.state = self.afterDoctypePublicKeywordState
return True
elif data in ("s", "S"):
matched = True
for expected in (("y", "Y"), ("s", "S"), ("t", "T"),
("e", "E"), ("m", "M")):
data = self.stream.char()
if data not in expected:
matched = False
break
if matched:
self.state = self.afterDoctypeSystemKeywordState
return True
# All the characters read before the current 'data' will be
# [a-zA-Z], so they're garbage in the bogus doctype and can be
# discarded; only the latest character might be '>' or EOF
# and needs to be ungetted
self.stream.unget(data)
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"expected-space-or-right-bracket-in-doctype", "datavars":
{"data": data}})
self.currentToken["correct"] = False
self.state = self.bogusDoctypeState
return True
def afterDoctypePublicKeywordState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeDoctypePublicIdentifierState
elif data in ("'", '"'):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.stream.unget(data)
self.state = self.beforeDoctypePublicIdentifierState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.stream.unget(data)
self.state = self.beforeDoctypePublicIdentifierState
return True
def beforeDoctypePublicIdentifierState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == "\"":
self.currentToken["publicId"] = ""
self.state = self.doctypePublicIdentifierDoubleQuotedState
elif data == "'":
self.currentToken["publicId"] = ""
self.state = self.doctypePublicIdentifierSingleQuotedState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-end-of-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["correct"] = False
self.state = self.bogusDoctypeState
return True
def doctypePublicIdentifierDoubleQuotedState(self):
data = self.stream.char()
if data == "\"":
self.state = self.afterDoctypePublicIdentifierState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["publicId"] += "\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-end-of-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["publicId"] += data
return True
def doctypePublicIdentifierSingleQuotedState(self):
data = self.stream.char()
if data == "'":
self.state = self.afterDoctypePublicIdentifierState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["publicId"] += "\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-end-of-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["publicId"] += data
return True
def afterDoctypePublicIdentifierState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.betweenDoctypePublicAndSystemIdentifiersState
elif data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == '"':
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierDoubleQuotedState
elif data == "'":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierSingleQuotedState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["correct"] = False
self.state = self.bogusDoctypeState
return True
def betweenDoctypePublicAndSystemIdentifiersState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == '"':
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierDoubleQuotedState
elif data == "'":
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierSingleQuotedState
elif data == EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["correct"] = False
self.state = self.bogusDoctypeState
return True
def afterDoctypeSystemKeywordState(self):
data = self.stream.char()
if data in spaceCharacters:
self.state = self.beforeDoctypeSystemIdentifierState
elif data in ("'", '"'):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.stream.unget(data)
self.state = self.beforeDoctypeSystemIdentifierState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.stream.unget(data)
self.state = self.beforeDoctypeSystemIdentifierState
return True
def beforeDoctypeSystemIdentifierState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == "\"":
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierDoubleQuotedState
elif data == "'":
self.currentToken["systemId"] = ""
self.state = self.doctypeSystemIdentifierSingleQuotedState
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.currentToken["correct"] = False
self.state = self.bogusDoctypeState
return True
def doctypeSystemIdentifierDoubleQuotedState(self):
data = self.stream.char()
if data == "\"":
self.state = self.afterDoctypeSystemIdentifierState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["systemId"] += "\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-end-of-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["systemId"] += data
return True
def doctypeSystemIdentifierSingleQuotedState(self):
data = self.stream.char()
if data == "'":
self.state = self.afterDoctypeSystemIdentifierState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["systemId"] += "\uFFFD"
elif data == ">":
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-end-of-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.currentToken["systemId"] += data
return True
def afterDoctypeSystemIdentifierState(self):
data = self.stream.char()
if data in spaceCharacters:
pass
elif data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype"})
self.currentToken["correct"] = False
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"unexpected-char-in-doctype"})
self.state = self.bogusDoctypeState
return True
def bogusDoctypeState(self):
data = self.stream.char()
if data == ">":
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data is EOF:
# XXX EMIT
self.stream.unget(data)
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
pass
return True
def cdataSectionState(self):
data = []
while True:
data.append(self.stream.charsUntil("]"))
data.append(self.stream.charsUntil(">"))
char = self.stream.char()
if char == EOF:
break
else:
assert char == ">"
if data[-1][-2:] == "]]":
data[-1] = data[-1][:-2]
break
else:
data.append(char)
data = "".join(data) # pylint:disable=redefined-variable-type
# Deal with null here rather than in the parser
nullCount = data.count("\u0000")
if nullCount > 0:
for _ in range(nullCount):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
data = data.replace("\u0000", "\uFFFD")
if data:
self.tokenQueue.append({"type": tokenTypes["Characters"],
"data": data})
self.state = self.dataState
return True
from __future__ import absolute_import, division, unicode_literals
from .py import Trie as PyTrie
Trie = PyTrie
# pylint:disable=wrong-import-position
try:
from .datrie import Trie as DATrie
except ImportError:
pass
else:
Trie = DATrie
# pylint:enable=wrong-import-position
from __future__ import absolute_import, division, unicode_literals
from collections import Mapping
class Trie(Mapping):
"""Abstract base class for tries"""
def keys(self, prefix=None):
# pylint:disable=arguments-differ
keys = super(Trie, self).keys()
if prefix is None:
return set(keys)
# Python 2.6: no set comprehensions
return set([x for x in keys if x.startswith(prefix)])
def has_keys_with_prefix(self, prefix):
for key in self.keys():
if key.startswith(prefix):
return True
return False
def longest_prefix(self, prefix):
if prefix in self:
return prefix
for i in range(1, len(prefix) + 1):
if prefix[:-i] in self:
return prefix[:-i]
raise KeyError(prefix)
def longest_prefix_item(self, prefix):
lprefix = self.longest_prefix(prefix)
return (lprefix, self[lprefix])
from __future__ import absolute_import, division, unicode_literals
from datrie import Trie as DATrie
from pip._vendor.six import text_type
from ._base import Trie as ABCTrie
class Trie(ABCTrie):
def __init__(self, data):
chars = set()
for key in data.keys():
if not isinstance(key, text_type):
raise TypeError("All keys must be strings")
for char in key:
chars.add(char)
self._data = DATrie("".join(chars))
for key, value in data.items():
self._data[key] = value
def __contains__(self, key):
return key in self._data
def __len__(self):
return len(self._data)
def __iter__(self):
raise NotImplementedError()
def __getitem__(self, key):
return self._data[key]
def keys(self, prefix=None):
return self._data.keys(prefix)
def has_keys_with_prefix(self, prefix):
return self._data.has_keys_with_prefix(prefix)
def longest_prefix(self, prefix):
return self._data.longest_prefix(prefix)
def longest_prefix_item(self, prefix):
return self._data.longest_prefix_item(prefix)
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type
from bisect import bisect_left
from ._base import Trie as ABCTrie
class Trie(ABCTrie):
def __init__(self, data):
if not all(isinstance(x, text_type) for x in data.keys()):
raise TypeError("All keys must be strings")
self._data = data
self._keys = sorted(data.keys())
self._cachestr = ""
self._cachepoints = (0, len(data))
def __contains__(self, key):
return key in self._data
def __len__(self):
return len(self._data)
def __iter__(self):
return iter(self._data)
def __getitem__(self, key):
return self._data[key]
def keys(self, prefix=None):
if prefix is None or prefix == "" or not self._keys:
return set(self._keys)
if prefix.startswith(self._cachestr):
lo, hi = self._cachepoints
start = i = bisect_left(self._keys, prefix, lo, hi)
else:
start = i = bisect_left(self._keys, prefix)
keys = set()
if start == len(self._keys):
return keys
while self._keys[i].startswith(prefix):
keys.add(self._keys[i])
i += 1
self._cachestr = prefix
self._cachepoints = (start, i)
return keys
def has_keys_with_prefix(self, prefix):
if prefix in self._data:
return True
if prefix.startswith(self._cachestr):
lo, hi = self._cachepoints
i = bisect_left(self._keys, prefix, lo, hi)
else:
i = bisect_left(self._keys, prefix)
if i == len(self._keys):
return False
return self._keys[i].startswith(prefix)
from __future__ import absolute_import, division, unicode_literals
import sys
from types import ModuleType
from pip._vendor.six import text_type
try:
import xml.etree.cElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
"surrogatePairToCodepoint", "moduleFactoryFactory",
"supports_lone_surrogates", "PY27"]
PY27 = sys.version_info[0] == 2 and sys.version_info[1] >= 7
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
# caught by the below test. In general this would be any platform
# using UTF-16 as its encoding of unicode strings, such as
# Jython. This is because UTF-16 itself is based on the use of such
# surrogates, and there is no mechanism to further escape such
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
if not isinstance(_x, text_type):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
except: # pylint:disable=bare-except
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
class MethodDispatcher(dict):
"""Dict with 2 special properties:
On initiation, keys that are lists, sets or tuples are converted to
multiple keys so accessing any one of the items in the original
list-like object returns the matching value
md = MethodDispatcher({("foo", "bar"):"baz"})
md["foo"] == "baz"
A default value which can be set through the default attribute.
"""
def __init__(self, items=()):
# Using _dictEntries instead of directly assigning to self is about
# twice as fast. Please do careful performance testing before changing
# anything here.
_dictEntries = []
for name, value in items:
if isinstance(name, (list, tuple, frozenset, set)):
for item in name:
_dictEntries.append((item, value))
else:
_dictEntries.append((name, value))
dict.__init__(self, _dictEntries)
assert len(self) == len(_dictEntries)
self.default = None
def __getitem__(self, key):
return dict.get(self, key, self.default)
# Some utility functions to deal with weirdness around UCS2 vs UCS4
# python builds
def isSurrogatePair(data):
return (len(data) == 2 and
ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
def surrogatePairToCodepoint(data):
char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
(ord(data[1]) - 0xDC00))
return char_val
# Module Factory Factory (no, this isn't Java, I know)
# Here to stop this being duplicated all over the place.
def moduleFactoryFactory(factory):
moduleCache = {}
def moduleFactory(baseModule, *args, **kwargs):
if isinstance(ModuleType.__name__, type("")):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
kwargs_tuple = tuple(kwargs.items())
try:
return moduleCache[name][args][kwargs_tuple]
except KeyError:
mod = ModuleType(name)
objs = factory(baseModule, *args, **kwargs)
mod.__dict__.update(objs)
if "name" not in moduleCache:
moduleCache[name] = {}
if "args" not in moduleCache[name]:
moduleCache[name][args] = {}
if "kwargs" not in moduleCache[name][args]:
moduleCache[name][args][kwargs_tuple] = {}
moduleCache[name][args][kwargs_tuple] = mod
return mod
return moduleFactory
def memoize(func):
cache = {}
def wrapped(*args, **kwargs):
key = (tuple(args), tuple(kwargs.items()))
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
return wrapped
from __future__ import absolute_import, division, unicode_literals
import string
EOF = None
E = {
"null-character":
"Null character in input stream, replaced with U+FFFD.",
"invalid-codepoint":
"Invalid codepoint in stream.",
"incorrectly-placed-solidus":
"Solidus (/) incorrectly placed in tag.",
"incorrect-cr-newline-entity":
"Incorrect CR newline entity, replaced with LF.",
"illegal-windows-1252-entity":
"Entity used with illegal number (windows-1252 reference).",
"cant-convert-numeric-entity":
"Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x).",
"illegal-codepoint-for-numeric-entity":
"Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x.",
"numeric-entity-without-semicolon":
"Numeric entity didn't end with ';'.",
"expected-numeric-entity-but-got-eof":
"Numeric entity expected. Got end of file instead.",
"expected-numeric-entity":
"Numeric entity expected but none found.",
"named-entity-without-semicolon":
"Named entity didn't end with ';'.",
"expected-named-entity":
"Named entity expected. Got none.",
"attributes-in-end-tag":
"End tag contains unexpected attributes.",
'self-closing-flag-on-end-tag':
"End tag contains unexpected self-closing flag.",
"expected-tag-name-but-got-right-bracket":
"Expected tag name. Got '>' instead.",
"expected-tag-name-but-got-question-mark":
"Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)",
"expected-tag-name":
"Expected tag name. Got something else instead",
"expected-closing-tag-but-got-right-bracket":
"Expected closing tag. Got '>' instead. Ignoring '</>'.",
"expected-closing-tag-but-got-eof":
"Expected closing tag. Unexpected end of file.",
"expected-closing-tag-but-got-char":
"Expected closing tag. Unexpected character '%(data)s' found.",
"eof-in-tag-name":
"Unexpected end of file in the tag name.",
"expected-attribute-name-but-got-eof":
"Unexpected end of file. Expected attribute name instead.",
"eof-in-attribute-name":
"Unexpected end of file in attribute name.",
"invalid-character-in-attribute-name":
"Invalid character in attribute name",
"duplicate-attribute":
"Dropped duplicate attribute on tag.",
"expected-end-of-tag-name-but-got-eof":
"Unexpected end of file. Expected = or end of tag.",
"expected-attribute-value-but-got-eof":
"Unexpected end of file. Expected attribute value.",
"expected-attribute-value-but-got-right-bracket":
"Expected attribute value. Got '>' instead.",
'equals-in-unquoted-attribute-value':
"Unexpected = in unquoted attribute",
'unexpected-character-in-unquoted-attribute-value':
"Unexpected character in unquoted attribute",
"invalid-character-after-attribute-name":
"Unexpected character after attribute name.",
"unexpected-character-after-attribute-value":
"Unexpected character after attribute value.",
"eof-in-attribute-value-double-quote":
"Unexpected end of file in attribute value (\").",
"eof-in-attribute-value-single-quote":
"Unexpected end of file in attribute value (').",
"eof-in-attribute-value-no-quotes":
"Unexpected end of file in attribute value.",
"unexpected-EOF-after-solidus-in-tag":
"Unexpected end of file in tag. Expected >",
"unexpected-character-after-solidus-in-tag":
"Unexpected character after / in tag. Expected >",
"expected-dashes-or-doctype":
"Expected '--' or 'DOCTYPE'. Not found.",
"unexpected-bang-after-double-dash-in-comment":
"Unexpected ! after -- in comment",
"unexpected-space-after-double-dash-in-comment":
"Unexpected space after -- in comment",
"incorrect-comment":
"Incorrect comment.",
"eof-in-comment":
"Unexpected end of file in comment.",
"eof-in-comment-end-dash":
"Unexpected end of file in comment (-)",
"unexpected-dash-after-double-dash-in-comment":
"Unexpected '-' after '--' found in comment.",
"eof-in-comment-double-dash":
"Unexpected end of file in comment (--).",
"eof-in-comment-end-space-state":
"Unexpected end of file in comment.",
"eof-in-comment-end-bang-state":
"Unexpected end of file in comment.",
"unexpected-char-in-comment":
"Unexpected character in comment found.",
"need-space-after-doctype":
"No space after literal string 'DOCTYPE'.",
"expected-doctype-name-but-got-right-bracket":
"Unexpected > character. Expected DOCTYPE name.",
"expected-doctype-name-but-got-eof":
"Unexpected end of file. Expected DOCTYPE name.",
"eof-in-doctype-name":
"Unexpected end of file in DOCTYPE name.",
"eof-in-doctype":
"Unexpected end of file in DOCTYPE.",
"expected-space-or-right-bracket-in-doctype":
"Expected space or '>'. Got '%(data)s'",
"unexpected-end-of-doctype":
"Unexpected end of DOCTYPE.",
"unexpected-char-in-doctype":
"Unexpected character in DOCTYPE.",
"eof-in-innerhtml":
"XXX innerHTML EOF",
"unexpected-doctype":
"Unexpected DOCTYPE. Ignored.",
"non-html-root":
"html needs to be the first start tag.",
"expected-doctype-but-got-eof":
"Unexpected End of file. Expected DOCTYPE.",
"unknown-doctype":
"Erroneous DOCTYPE.",
"expected-doctype-but-got-chars":
"Unexpected non-space characters. Expected DOCTYPE.",
"expected-doctype-but-got-start-tag":
"Unexpected start tag (%(name)s). Expected DOCTYPE.",
"expected-doctype-but-got-end-tag":
"Unexpected end tag (%(name)s). Expected DOCTYPE.",
"end-tag-after-implied-root":
"Unexpected end tag (%(name)s) after the (implied) root element.",
"expected-named-closing-tag-but-got-eof":
"Unexpected end of file. Expected end tag (%(name)s).",
"two-heads-are-not-better-than-one":
"Unexpected start tag head in existing head. Ignored.",
"unexpected-end-tag":
"Unexpected end tag (%(name)s). Ignored.",
"unexpected-start-tag-out-of-my-head":
"Unexpected start tag (%(name)s) that can be in head. Moved.",
"unexpected-start-tag":
"Unexpected start tag (%(name)s).",
"missing-end-tag":
"Missing end tag (%(name)s).",
"missing-end-tags":
"Missing end tags (%(name)s).",
"unexpected-start-tag-implies-end-tag":
"Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s).",
"unexpected-start-tag-treated-as":
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
"deprecated-tag":
"Unexpected start tag %(name)s. Don't use it!",
"unexpected-start-tag-ignored":
"Unexpected start tag %(name)s. Ignored.",
"expected-one-end-tag-but-got-another":
"Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s).",
"end-tag-too-early":
"End tag (%(name)s) seen too early. Expected other end tag.",
"end-tag-too-early-named":
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
"end-tag-too-early-ignored":
"End tag (%(name)s) seen too early. Ignored.",
"adoption-agency-1.1":
"End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm.",
"adoption-agency-1.2":
"End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm.",
"adoption-agency-1.3":
"End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm.",
"adoption-agency-4.4":
"End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm.",
"unexpected-end-tag-treated-as":
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
"no-end-tag":
"This element (%(name)s) has no end tag.",
"unexpected-implied-end-tag-in-table":
"Unexpected implied end tag (%(name)s) in the table phase.",
"unexpected-implied-end-tag-in-table-body":
"Unexpected implied end tag (%(name)s) in the table body phase.",
"unexpected-char-implies-table-voodoo":
"Unexpected non-space characters in "
"table context caused voodoo mode.",
"unexpected-hidden-input-in-table":
"Unexpected input with type hidden in table context.",
"unexpected-form-in-table":
"Unexpected form in table context.",
"unexpected-start-tag-implies-table-voodoo":
"Unexpected start tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-end-tag-implies-table-voodoo":
"Unexpected end tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-cell-in-table-body":
"Unexpected table cell start tag (%(name)s) "
"in the table body phase.",
"unexpected-cell-end-tag":
"Got table cell end tag (%(name)s) "
"while required end tags are missing.",
"unexpected-end-tag-in-table-body":
"Unexpected end tag (%(name)s) in the table body phase. Ignored.",
"unexpected-implied-end-tag-in-table-row":
"Unexpected implied end tag (%(name)s) in the table row phase.",
"unexpected-end-tag-in-table-row":
"Unexpected end tag (%(name)s) in the table row phase. Ignored.",
"unexpected-select-in-select":
"Unexpected select start tag in the select phase "
"treated as select end tag.",
"unexpected-input-in-select":
"Unexpected input start tag in the select phase.",
"unexpected-start-tag-in-select":
"Unexpected start tag token (%(name)s in the select phase. "
"Ignored.",
"unexpected-end-tag-in-select":
"Unexpected end tag (%(name)s) in the select phase. Ignored.",
"unexpected-table-element-start-tag-in-select-in-table":
"Unexpected table element start tag (%(name)s) in the select in table phase.",
"unexpected-table-element-end-tag-in-select-in-table":
"Unexpected table element end tag (%(name)s) in the select in table phase.",
"unexpected-char-after-body":
"Unexpected non-space characters in the after body phase.",
"unexpected-start-tag-after-body":
"Unexpected start tag token (%(name)s)"
" in the after body phase.",
"unexpected-end-tag-after-body":
"Unexpected end tag token (%(name)s)"
" in the after body phase.",
"unexpected-char-in-frameset":
"Unexpected characters in the frameset phase. Characters ignored.",
"unexpected-start-tag-in-frameset":
"Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-frameset-in-frameset-innerhtml":
"Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML).",
"unexpected-end-tag-in-frameset":
"Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-char-after-frameset":
"Unexpected non-space characters in the "
"after frameset phase. Ignored.",
"unexpected-start-tag-after-frameset":
"Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-frameset":
"Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-body-innerhtml":
"Unexpected end tag after body(innerHtml)",
"expected-eof-but-got-char":
"Unexpected non-space characters. Expected end of file.",
"expected-eof-but-got-start-tag":
"Unexpected start tag (%(name)s)"
". Expected end of file.",
"expected-eof-but-got-end-tag":
"Unexpected end tag (%(name)s)"
". Expected end of file.",
"eof-in-table":
"Unexpected end of file. Expected table content.",
"eof-in-select":
"Unexpected end of file. Expected select content.",
"eof-in-frameset":
"Unexpected end of file. Expected frameset content.",
"eof-in-script-in-script":
"Unexpected end of file. Expected script content.",
"eof-in-foreign-lands":
"Unexpected end of file. Expected foreign content",
"non-void-element-with-trailing-solidus":
"Trailing solidus not allowed on element %(name)s",
"unexpected-html-element-in-foreign-content":
"Element %(name)s not allowed in a non-html context",
"unexpected-end-tag-before-html":
"Unexpected end tag (%(name)s) before html.",
"unexpected-inhead-noscript-tag":
"Element %(name)s not allowed in a inhead-noscript context",
"eof-in-head-noscript":
"Unexpected end of file. Expected inhead-noscript content",
"char-in-head-noscript":
"Unexpected non-space character. Expected inhead-noscript content",
"XXX-undefined-error":
"Undefined error (this sucks and should be fixed)",
}
namespaces = {
"html": "http://www.w3.org/1999/xhtml",
"mathml": "http://www.w3.org/1998/Math/MathML",
"svg": "http://www.w3.org/2000/svg",
"xlink": "http://www.w3.org/1999/xlink",
"xml": "http://www.w3.org/XML/1998/namespace",
"xmlns": "http://www.w3.org/2000/xmlns/"
}
scopingElements = frozenset([
(namespaces["html"], "applet"),
(namespaces["html"], "caption"),
(namespaces["html"], "html"),
(namespaces["html"], "marquee"),
(namespaces["html"], "object"),
(namespaces["html"], "table"),
(namespaces["html"], "td"),
(namespaces["html"], "th"),
(namespaces["mathml"], "mi"),
(namespaces["mathml"], "mo"),
(namespaces["mathml"], "mn"),
(namespaces["mathml"], "ms"),
(namespaces["mathml"], "mtext"),
(namespaces["mathml"], "annotation-xml"),
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title"),
])
formattingElements = frozenset([
(namespaces["html"], "a"),
(namespaces["html"], "b"),
(namespaces["html"], "big"),
(namespaces["html"], "code"),
(namespaces["html"], "em"),
(namespaces["html"], "font"),
(namespaces["html"], "i"),
(namespaces["html"], "nobr"),
(namespaces["html"], "s"),
(namespaces["html"], "small"),
(namespaces["html"], "strike"),
(namespaces["html"], "strong"),
(namespaces["html"], "tt"),
(namespaces["html"], "u")
])
specialElements = frozenset([
(namespaces["html"], "address"),
(namespaces["html"], "applet"),
(namespaces["html"], "area"),
(namespaces["html"], "article"),
(namespaces["html"], "aside"),
(namespaces["html"], "base"),
(namespaces["html"], "basefont"),
(namespaces["html"], "bgsound"),
(namespaces["html"], "blockquote"),
(namespaces["html"], "body"),
(namespaces["html"], "br"),
(namespaces["html"], "button"),
(namespaces["html"], "caption"),
(namespaces["html"], "center"),
(namespaces["html"], "col"),
(namespaces["html"], "colgroup"),
(namespaces["html"], "command"),
(namespaces["html"], "dd"),
(namespaces["html"], "details"),
(namespaces["html"], "dir"),
(namespaces["html"], "div"),
(namespaces["html"], "dl"),
(namespaces["html"], "dt"),
(namespaces["html"], "embed"),
(namespaces["html"], "fieldset"),
(namespaces["html"], "figure"),
(namespaces["html"], "footer"),
(namespaces["html"], "form"),
(namespaces["html"], "frame"),
(namespaces["html"], "frameset"),
(namespaces["html"], "h1"),
(namespaces["html"], "h2"),
(namespaces["html"], "h3"),
(namespaces["html"], "h4"),
(namespaces["html"], "h5"),
(namespaces["html"], "h6"),
(namespaces["html"], "head"),
(namespaces["html"], "header"),
(namespaces["html"], "hr"),
(namespaces["html"], "html"),
(namespaces["html"], "iframe"),
# Note that image is commented out in the spec as "this isn't an
# element that can end up on the stack, so it doesn't matter,"
(namespaces["html"], "image"),
(namespaces["html"], "img"),
(namespaces["html"], "input"),
(namespaces["html"], "isindex"),
(namespaces["html"], "li"),
(namespaces["html"], "link"),
(namespaces["html"], "listing"),
(namespaces["html"], "marquee"),
(namespaces["html"], "menu"),
(namespaces["html"], "meta"),
(namespaces["html"], "nav"),
(namespaces["html"], "noembed"),
(namespaces["html"], "noframes"),
(namespaces["html"], "noscript"),
(namespaces["html"], "object"),
(namespaces["html"], "ol"),
(namespaces["html"], "p"),
(namespaces["html"], "param"),
(namespaces["html"], "plaintext"),
(namespaces["html"], "pre"),
(namespaces["html"], "script"),
(namespaces["html"], "section"),
(namespaces["html"], "select"),
(namespaces["html"], "style"),
(namespaces["html"], "table"),
(namespaces["html"], "tbody"),
(namespaces["html"], "td"),
(namespaces["html"], "textarea"),
(namespaces["html"], "tfoot"),
(namespaces["html"], "th"),
(namespaces["html"], "thead"),
(namespaces["html"], "title"),
(namespaces["html"], "tr"),
(namespaces["html"], "ul"),
(namespaces["html"], "wbr"),
(namespaces["html"], "xmp"),
(namespaces["svg"], "foreignObject")
])
htmlIntegrationPointElements = frozenset([
(namespaces["mathml"], "annotaion-xml"),
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title")
])
mathmlTextIntegrationPointElements = frozenset([
(namespaces["mathml"], "mi"),
(namespaces["mathml"], "mo"),
(namespaces["mathml"], "mn"),
(namespaces["mathml"], "ms"),
(namespaces["mathml"], "mtext")
])
adjustSVGAttributes = {
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan"
}
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
adjustForeignAttributes = {
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
"xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
"xlink:href": ("xlink", "href", namespaces["xlink"]),
"xlink:role": ("xlink", "role", namespaces["xlink"]),
"xlink:show": ("xlink", "show", namespaces["xlink"]),
"xlink:title": ("xlink", "title", namespaces["xlink"]),
"xlink:type": ("xlink", "type", namespaces["xlink"]),
"xml:base": ("xml", "base", namespaces["xml"]),
"xml:lang": ("xml", "lang", namespaces["xml"]),
"xml:space": ("xml", "space", namespaces["xml"]),
"xmlns": (None, "xmlns", namespaces["xmlns"]),
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
}
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
adjustForeignAttributes.items()])
spaceCharacters = frozenset([
"\t",
"\n",
"\u000C",
" ",
"\r"
])
tableInsertModeElements = frozenset([
"table",
"tbody",
"tfoot",
"thead",
"tr"
])
asciiLowercase = frozenset(string.ascii_lowercase)
asciiUppercase = frozenset(string.ascii_uppercase)
asciiLetters = frozenset(string.ascii_letters)
digits = frozenset(string.digits)
hexDigits = frozenset(string.hexdigits)
asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
for c in string.ascii_uppercase])
# Heading elements need to be ordered
headingElements = (
"h1",
"h2",
"h3",
"h4",
"h5",
"h6"
)
voidElements = frozenset([
"base",
"command",
"event-source",
"link",
"meta",
"hr",
"br",
"img",
"embed",
"param",
"area",
"col",
"input",
"source",
"track"
])
cdataElements = frozenset(['title', 'textarea'])
rcdataElements = frozenset([
'style',
'script',
'xmp',
'iframe',
'noembed',
'noframes',
'noscript'
])
booleanAttributes = {
"": frozenset(["irrelevant"]),
"style": frozenset(["scoped"]),
"img": frozenset(["ismap"]),
"audio": frozenset(["autoplay", "controls"]),
"video": frozenset(["autoplay", "controls"]),
"script": frozenset(["defer", "async"]),
"details": frozenset(["open"]),
"datagrid": frozenset(["multiple", "disabled"]),
"command": frozenset(["hidden", "disabled", "checked", "default"]),
"hr": frozenset(["noshade"]),
"menu": frozenset(["autosubmit"]),
"fieldset": frozenset(["disabled", "readonly"]),
"option": frozenset(["disabled", "readonly", "selected"]),
"optgroup": frozenset(["disabled", "readonly"]),
"button": frozenset(["disabled", "autofocus"]),
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
"output": frozenset(["disabled", "readonly"]),
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
# therefore can't be a frozenset.
entitiesWindows1252 = (
8364, # 0x80 0x20AC EURO SIGN
65533, # 0x81 UNDEFINED
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
8224, # 0x86 0x2020 DAGGER
8225, # 0x87 0x2021 DOUBLE DAGGER
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
8240, # 0x89 0x2030 PER MILLE SIGN
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
65533, # 0x8D UNDEFINED
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
65533, # 0x8F UNDEFINED
65533, # 0x90 UNDEFINED
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
8226, # 0x95 0x2022 BULLET
8211, # 0x96 0x2013 EN DASH
8212, # 0x97 0x2014 EM DASH
732, # 0x98 0x02DC SMALL TILDE
8482, # 0x99 0x2122 TRADE MARK SIGN
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
65533, # 0x9D UNDEFINED
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
)
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
entities = {
"AElig": "\xc6",
"AElig;": "\xc6",
"AMP": "&",
"AMP;": "&",
"Aacute": "\xc1",
"Aacute;": "\xc1",
"Abreve;": "\u0102",
"Acirc": "\xc2",
"Acirc;": "\xc2",
"Acy;": "\u0410",
"Afr;": "\U0001d504",
"Agrave": "\xc0",
"Agrave;": "\xc0",
"Alpha;": "\u0391",
"Amacr;": "\u0100",
"And;": "\u2a53",
"Aogon;": "\u0104",
"Aopf;": "\U0001d538",
"ApplyFunction;": "\u2061",
"Aring": "\xc5",
"Aring;": "\xc5",
"Ascr;": "\U0001d49c",
"Assign;": "\u2254",
"Atilde": "\xc3",
"Atilde;": "\xc3",
"Auml": "\xc4",
"Auml;": "\xc4",
"Backslash;": "\u2216",
"Barv;": "\u2ae7",
"Barwed;": "\u2306",
"Bcy;": "\u0411",
"Because;": "\u2235",
"Bernoullis;": "\u212c",
"Beta;": "\u0392",
"Bfr;": "\U0001d505",
"Bopf;": "\U0001d539",
"Breve;": "\u02d8",
"Bscr;": "\u212c",
"Bumpeq;": "\u224e",
"CHcy;": "\u0427",
"COPY": "\xa9",
"COPY;": "\xa9",
"Cacute;": "\u0106",
"Cap;": "\u22d2",
"CapitalDifferentialD;": "\u2145",
"Cayleys;": "\u212d",
"Ccaron;": "\u010c",
"Ccedil": "\xc7",
"Ccedil;": "\xc7",
"Ccirc;": "\u0108",
"Cconint;": "\u2230",
"Cdot;": "\u010a",
"Cedilla;": "\xb8",
"CenterDot;": "\xb7",
"Cfr;": "\u212d",
"Chi;": "\u03a7",
"CircleDot;": "\u2299",
"CircleMinus;": "\u2296",
"CirclePlus;": "\u2295",
"CircleTimes;": "\u2297",
"ClockwiseContourIntegral;": "\u2232",
"CloseCurlyDoubleQuote;": "\u201d",
"CloseCurlyQuote;": "\u2019",
"Colon;": "\u2237",
"Colone;": "\u2a74",
"Congruent;": "\u2261",
"Conint;": "\u222f",
"ContourIntegral;": "\u222e",
"Copf;": "\u2102",
"Coproduct;": "\u2210",
"CounterClockwiseContourIntegral;": "\u2233",
"Cross;": "\u2a2f",
"Cscr;": "\U0001d49e",
"Cup;": "\u22d3",
"CupCap;": "\u224d",
"DD;": "\u2145",
"DDotrahd;": "\u2911",
"DJcy;": "\u0402",
"DScy;": "\u0405",
"DZcy;": "\u040f",
"Dagger;": "\u2021",
"Darr;": "\u21a1",
"Dashv;": "\u2ae4",
"Dcaron;": "\u010e",
"Dcy;": "\u0414",
"Del;": "\u2207",
"Delta;": "\u0394",
"Dfr;": "\U0001d507",
"DiacriticalAcute;": "\xb4",
"DiacriticalDot;": "\u02d9",
"DiacriticalDoubleAcute;": "\u02dd",
"DiacriticalGrave;": "`",
"DiacriticalTilde;": "\u02dc",
"Diamond;": "\u22c4",
"DifferentialD;": "\u2146",
"Dopf;": "\U0001d53b",
"Dot;": "\xa8",
"DotDot;": "\u20dc",
"DotEqual;": "\u2250",
"DoubleContourIntegral;": "\u222f",
"DoubleDot;": "\xa8",
"DoubleDownArrow;": "\u21d3",
"DoubleLeftArrow;": "\u21d0",
"DoubleLeftRightArrow;": "\u21d4",
"DoubleLeftTee;": "\u2ae4",
"DoubleLongLeftArrow;": "\u27f8",
"DoubleLongLeftRightArrow;": "\u27fa",
"DoubleLongRightArrow;": "\u27f9",
"DoubleRightArrow;": "\u21d2",
"DoubleRightTee;": "\u22a8",
"DoubleUpArrow;": "\u21d1",
"DoubleUpDownArrow;": "\u21d5",
"DoubleVerticalBar;": "\u2225",
"DownArrow;": "\u2193",
"DownArrowBar;": "\u2913",
"DownArrowUpArrow;": "\u21f5",
"DownBreve;": "\u0311",
"DownLeftRightVector;": "\u2950",
"DownLeftTeeVector;": "\u295e",
"DownLeftVector;": "\u21bd",
"DownLeftVectorBar;": "\u2956",
"DownRightTeeVector;": "\u295f",
"DownRightVector;": "\u21c1",
"DownRightVectorBar;": "\u2957",
"DownTee;": "\u22a4",
"DownTeeArrow;": "\u21a7",
"Downarrow;": "\u21d3",
"Dscr;": "\U0001d49f",
"Dstrok;": "\u0110",
"ENG;": "\u014a",
"ETH": "\xd0",
"ETH;": "\xd0",
"Eacute": "\xc9",
"Eacute;": "\xc9",
"Ecaron;": "\u011a",
"Ecirc": "\xca",
"Ecirc;": "\xca",
"Ecy;": "\u042d",
"Edot;": "\u0116",
"Efr;": "\U0001d508",
"Egrave": "\xc8",
"Egrave;": "\xc8",
"Element;": "\u2208",
"Emacr;": "\u0112",
"EmptySmallSquare;": "\u25fb",
"EmptyVerySmallSquare;": "\u25ab",
"Eogon;": "\u0118",
"Eopf;": "\U0001d53c",
"Epsilon;": "\u0395",
"Equal;": "\u2a75",
"EqualTilde;": "\u2242",
"Equilibrium;": "\u21cc",
"Escr;": "\u2130",
"Esim;": "\u2a73",
"Eta;": "\u0397",
"Euml": "\xcb",
"Euml;": "\xcb",
"Exists;": "\u2203",
"ExponentialE;": "\u2147",
"Fcy;": "\u0424",
"Ffr;": "\U0001d509",
"FilledSmallSquare;": "\u25fc",
"FilledVerySmallSquare;": "\u25aa",
"Fopf;": "\U0001d53d",
"ForAll;": "\u2200",
"Fouriertrf;": "\u2131",
"Fscr;": "\u2131",
"GJcy;": "\u0403",
"GT": ">",
"GT;": ">",
"Gamma;": "\u0393",
"Gammad;": "\u03dc",
"Gbreve;": "\u011e",
"Gcedil;": "\u0122",
"Gcirc;": "\u011c",
"Gcy;": "\u0413",
"Gdot;": "\u0120",
"Gfr;": "\U0001d50a",
"Gg;": "\u22d9",
"Gopf;": "\U0001d53e",
"GreaterEqual;": "\u2265",
"GreaterEqualLess;": "\u22db",
"GreaterFullEqual;": "\u2267",
"GreaterGreater;": "\u2aa2",
"GreaterLess;": "\u2277",
"GreaterSlantEqual;": "\u2a7e",
"GreaterTilde;": "\u2273",
"Gscr;": "\U0001d4a2",
"Gt;": "\u226b",
"HARDcy;": "\u042a",
"Hacek;": "\u02c7",
"Hat;": "^",
"Hcirc;": "\u0124",
"Hfr;": "\u210c",
"HilbertSpace;": "\u210b",
"Hopf;": "\u210d",
"HorizontalLine;": "\u2500",
"Hscr;": "\u210b",
"Hstrok;": "\u0126",
"HumpDownHump;": "\u224e",
"HumpEqual;": "\u224f",
"IEcy;": "\u0415",
"IJlig;": "\u0132",
"IOcy;": "\u0401",
"Iacute": "\xcd",
"Iacute;": "\xcd",
"Icirc": "\xce",
"Icirc;": "\xce",
"Icy;": "\u0418",
"Idot;": "\u0130",
"Ifr;": "\u2111",
"Igrave": "\xcc",
"Igrave;": "\xcc",
"Im;": "\u2111",
"Imacr;": "\u012a",
"ImaginaryI;": "\u2148",
"Implies;": "\u21d2",
"Int;": "\u222c",
"Integral;": "\u222b",
"Intersection;": "\u22c2",
"InvisibleComma;": "\u2063",
"InvisibleTimes;": "\u2062",
"Iogon;": "\u012e",
"Iopf;": "\U0001d540",
"Iota;": "\u0399",
"Iscr;": "\u2110",
"Itilde;": "\u0128",
"Iukcy;": "\u0406",
"Iuml": "\xcf",
"Iuml;": "\xcf",
"Jcirc;": "\u0134",
"Jcy;": "\u0419",
"Jfr;": "\U0001d50d",
"Jopf;": "\U0001d541",
"Jscr;": "\U0001d4a5",
"Jsercy;": "\u0408",
"Jukcy;": "\u0404",
"KHcy;": "\u0425",
"KJcy;": "\u040c",
"Kappa;": "\u039a",
"Kcedil;": "\u0136",
"Kcy;": "\u041a",
"Kfr;": "\U0001d50e",
"Kopf;": "\U0001d542",
"Kscr;": "\U0001d4a6",
"LJcy;": "\u0409",
"LT": "<",
"LT;": "<",
"Lacute;": "\u0139",
"Lambda;": "\u039b",
"Lang;": "\u27ea",
"Laplacetrf;": "\u2112",
"Larr;": "\u219e",
"Lcaron;": "\u013d",
"Lcedil;": "\u013b",
"Lcy;": "\u041b",
"LeftAngleBracket;": "\u27e8",
"LeftArrow;": "\u2190",
"LeftArrowBar;": "\u21e4",
"LeftArrowRightArrow;": "\u21c6",
"LeftCeiling;": "\u2308",
"LeftDoubleBracket;": "\u27e6",
"LeftDownTeeVector;": "\u2961",
"LeftDownVector;": "\u21c3",
"LeftDownVectorBar;": "\u2959",
"LeftFloor;": "\u230a",
"LeftRightArrow;": "\u2194",
"LeftRightVector;": "\u294e",
"LeftTee;": "\u22a3",
"LeftTeeArrow;": "\u21a4",
"LeftTeeVector;": "\u295a",
"LeftTriangle;": "\u22b2",
"LeftTriangleBar;": "\u29cf",
"LeftTriangleEqual;": "\u22b4",
"LeftUpDownVector;": "\u2951",
"LeftUpTeeVector;": "\u2960",
"LeftUpVector;": "\u21bf",
"LeftUpVectorBar;": "\u2958",
"LeftVector;": "\u21bc",
"LeftVectorBar;": "\u2952",
"Leftarrow;": "\u21d0",
"Leftrightarrow;": "\u21d4",
"LessEqualGreater;": "\u22da",
"LessFullEqual;": "\u2266",
"LessGreater;": "\u2276",
"LessLess;": "\u2aa1",
"LessSlantEqual;": "\u2a7d",
"LessTilde;": "\u2272",
"Lfr;": "\U0001d50f",
"Ll;": "\u22d8",
"Lleftarrow;": "\u21da",
"Lmidot;": "\u013f",
"LongLeftArrow;": "\u27f5",
"LongLeftRightArrow;": "\u27f7",
"LongRightArrow;": "\u27f6",
"Longleftarrow;": "\u27f8",
"Longleftrightarrow;": "\u27fa",
"Longrightarrow;": "\u27f9",
"Lopf;": "\U0001d543",
"LowerLeftArrow;": "\u2199",
"LowerRightArrow;": "\u2198",
"Lscr;": "\u2112",
"Lsh;": "\u21b0",
"Lstrok;": "\u0141",
"Lt;": "\u226a",
"Map;": "\u2905",
"Mcy;": "\u041c",
"MediumSpace;": "\u205f",
"Mellintrf;": "\u2133",
"Mfr;": "\U0001d510",
"MinusPlus;": "\u2213",
"Mopf;": "\U0001d544",
"Mscr;": "\u2133",
"Mu;": "\u039c",
"NJcy;": "\u040a",
"Nacute;": "\u0143",
"Ncaron;": "\u0147",
"Ncedil;": "\u0145",
"Ncy;": "\u041d",
"NegativeMediumSpace;": "\u200b",
"NegativeThickSpace;": "\u200b",
"NegativeThinSpace;": "\u200b",
"NegativeVeryThinSpace;": "\u200b",
"NestedGreaterGreater;": "\u226b",
"NestedLessLess;": "\u226a",
"NewLine;": "\n",
"Nfr;": "\U0001d511",
"NoBreak;": "\u2060",
"NonBreakingSpace;": "\xa0",
"Nopf;": "\u2115",
"Not;": "\u2aec",
"NotCongruent;": "\u2262",
"NotCupCap;": "\u226d",
"NotDoubleVerticalBar;": "\u2226",
"NotElement;": "\u2209",
"NotEqual;": "\u2260",
"NotEqualTilde;": "\u2242\u0338",
"NotExists;": "\u2204",
"NotGreater;": "\u226f",
"NotGreaterEqual;": "\u2271",
"NotGreaterFullEqual;": "\u2267\u0338",
"NotGreaterGreater;": "\u226b\u0338",
"NotGreaterLess;": "\u2279",
"NotGreaterSlantEqual;": "\u2a7e\u0338",
"NotGreaterTilde;": "\u2275",
"NotHumpDownHump;": "\u224e\u0338",
"NotHumpEqual;": "\u224f\u0338",
"NotLeftTriangle;": "\u22ea",
"NotLeftTriangleBar;": "\u29cf\u0338",
"NotLeftTriangleEqual;": "\u22ec",
"NotLess;": "\u226e",
"NotLessEqual;": "\u2270",
"NotLessGreater;": "\u2278",
"NotLessLess;": "\u226a\u0338",
"NotLessSlantEqual;": "\u2a7d\u0338",
"NotLessTilde;": "\u2274",
"NotNestedGreaterGreater;": "\u2aa2\u0338",
"NotNestedLessLess;": "\u2aa1\u0338",
"NotPrecedes;": "\u2280",
"NotPrecedesEqual;": "\u2aaf\u0338",
"NotPrecedesSlantEqual;": "\u22e0",
"NotReverseElement;": "\u220c",
"NotRightTriangle;": "\u22eb",
"NotRightTriangleBar;": "\u29d0\u0338",
"NotRightTriangleEqual;": "\u22ed",
"NotSquareSubset;": "\u228f\u0338",
"NotSquareSubsetEqual;": "\u22e2",
"NotSquareSuperset;": "\u2290\u0338",
"NotSquareSupersetEqual;": "\u22e3",
"NotSubset;": "\u2282\u20d2",
"NotSubsetEqual;": "\u2288",
"NotSucceeds;": "\u2281",
"NotSucceedsEqual;": "\u2ab0\u0338",
"NotSucceedsSlantEqual;": "\u22e1",
"NotSucceedsTilde;": "\u227f\u0338",
"NotSuperset;": "\u2283\u20d2",
"NotSupersetEqual;": "\u2289",
"NotTilde;": "\u2241",
"NotTildeEqual;": "\u2244",
"NotTildeFullEqual;": "\u2247",
"NotTildeTilde;": "\u2249",
"NotVerticalBar;": "\u2224",
"Nscr;": "\U0001d4a9",
"Ntilde": "\xd1",
"Ntilde;": "\xd1",
"Nu;": "\u039d",
"OElig;": "\u0152",
"Oacute": "\xd3",
"Oacute;": "\xd3",
"Ocirc": "\xd4",
"Ocirc;": "\xd4",
"Ocy;": "\u041e",
"Odblac;": "\u0150",
"Ofr;": "\U0001d512",
"Ograve": "\xd2",
"Ograve;": "\xd2",
"Omacr;": "\u014c",
"Omega;": "\u03a9",
"Omicron;": "\u039f",
"Oopf;": "\U0001d546",
"OpenCurlyDoubleQuote;": "\u201c",
"OpenCurlyQuote;": "\u2018",
"Or;": "\u2a54",
"Oscr;": "\U0001d4aa",
"Oslash": "\xd8",
"Oslash;": "\xd8",
"Otilde": "\xd5",
"Otilde;": "\xd5",
"Otimes;": "\u2a37",
"Ouml": "\xd6",
"Ouml;": "\xd6",
"OverBar;": "\u203e",
"OverBrace;": "\u23de",
"OverBracket;": "\u23b4",
"OverParenthesis;": "\u23dc",
"PartialD;": "\u2202",
"Pcy;": "\u041f",
"Pfr;": "\U0001d513",
"Phi;": "\u03a6",
"Pi;": "\u03a0",
"PlusMinus;": "\xb1",
"Poincareplane;": "\u210c",
"Popf;": "\u2119",
"Pr;": "\u2abb",
"Precedes;": "\u227a",
"PrecedesEqual;": "\u2aaf",
"PrecedesSlantEqual;": "\u227c",
"PrecedesTilde;": "\u227e",
"Prime;": "\u2033",
"Product;": "\u220f",
"Proportion;": "\u2237",
"Proportional;": "\u221d",
"Pscr;": "\U0001d4ab",
"Psi;": "\u03a8",
"QUOT": "\"",
"QUOT;": "\"",
"Qfr;": "\U0001d514",
"Qopf;": "\u211a",
"Qscr;": "\U0001d4ac",
"RBarr;": "\u2910",
"REG": "\xae",
"REG;": "\xae",
"Racute;": "\u0154",
"Rang;": "\u27eb",
"Rarr;": "\u21a0",
"Rarrtl;": "\u2916",
"Rcaron;": "\u0158",
"Rcedil;": "\u0156",
"Rcy;": "\u0420",
"Re;": "\u211c",
"ReverseElement;": "\u220b",
"ReverseEquilibrium;": "\u21cb",
"ReverseUpEquilibrium;": "\u296f",
"Rfr;": "\u211c",
"Rho;": "\u03a1",
"RightAngleBracket;": "\u27e9",
"RightArrow;": "\u2192",
"RightArrowBar;": "\u21e5",
"RightArrowLeftArrow;": "\u21c4",
"RightCeiling;": "\u2309",
"RightDoubleBracket;": "\u27e7",
"RightDownTeeVector;": "\u295d",
"RightDownVector;": "\u21c2",
"RightDownVectorBar;": "\u2955",
"RightFloor;": "\u230b",
"RightTee;": "\u22a2",
"RightTeeArrow;": "\u21a6",
"RightTeeVector;": "\u295b",
"RightTriangle;": "\u22b3",
"RightTriangleBar;": "\u29d0",
"RightTriangleEqual;": "\u22b5",
"RightUpDownVector;": "\u294f",
"RightUpTeeVector;": "\u295c",
"RightUpVector;": "\u21be",
"RightUpVectorBar;": "\u2954",
"RightVector;": "\u21c0",
"RightVectorBar;": "\u2953",
"Rightarrow;": "\u21d2",
"Ropf;": "\u211d",
"RoundImplies;": "\u2970",
"Rrightarrow;": "\u21db",
"Rscr;": "\u211b",
"Rsh;": "\u21b1",
"RuleDelayed;": "\u29f4",
"SHCHcy;": "\u0429",
"SHcy;": "\u0428",
"SOFTcy;": "\u042c",
"Sacute;": "\u015a",
"Sc;": "\u2abc",
"Scaron;": "\u0160",
"Scedil;": "\u015e",
"Scirc;": "\u015c",
"Scy;": "\u0421",
"Sfr;": "\U0001d516",
"ShortDownArrow;": "\u2193",
"ShortLeftArrow;": "\u2190",
"ShortRightArrow;": "\u2192",
"ShortUpArrow;": "\u2191",
"Sigma;": "\u03a3",
"SmallCircle;": "\u2218",
"Sopf;": "\U0001d54a",
"Sqrt;": "\u221a",
"Square;": "\u25a1",
"SquareIntersection;": "\u2293",
"SquareSubset;": "\u228f",
"SquareSubsetEqual;": "\u2291",
"SquareSuperset;": "\u2290",
"SquareSupersetEqual;": "\u2292",
"SquareUnion;": "\u2294",
"Sscr;": "\U0001d4ae",
"Star;": "\u22c6",
"Sub;": "\u22d0",
"Subset;": "\u22d0",
"SubsetEqual;": "\u2286",
"Succeeds;": "\u227b",
"SucceedsEqual;": "\u2ab0",
"SucceedsSlantEqual;": "\u227d",
"SucceedsTilde;": "\u227f",
"SuchThat;": "\u220b",
"Sum;": "\u2211",
"Sup;": "\u22d1",
"Superset;": "\u2283",
"SupersetEqual;": "\u2287",
"Supset;": "\u22d1",
"THORN": "\xde",
"THORN;": "\xde",
"TRADE;": "\u2122",
"TSHcy;": "\u040b",
"TScy;": "\u0426",
"Tab;": "\t",
"Tau;": "\u03a4",
"Tcaron;": "\u0164",
"Tcedil;": "\u0162",
"Tcy;": "\u0422",
"Tfr;": "\U0001d517",
"Therefore;": "\u2234",
"Theta;": "\u0398",
"ThickSpace;": "\u205f\u200a",
"ThinSpace;": "\u2009",
"Tilde;": "\u223c",
"TildeEqual;": "\u2243",
"TildeFullEqual;": "\u2245",
"TildeTilde;": "\u2248",
"Topf;": "\U0001d54b",
"TripleDot;": "\u20db",
"Tscr;": "\U0001d4af",
"Tstrok;": "\u0166",
"Uacute": "\xda",
"Uacute;": "\xda",
"Uarr;": "\u219f",
"Uarrocir;": "\u2949",
"Ubrcy;": "\u040e",
"Ubreve;": "\u016c",
"Ucirc": "\xdb",
"Ucirc;": "\xdb",
"Ucy;": "\u0423",
"Udblac;": "\u0170",
"Ufr;": "\U0001d518",
"Ugrave": "\xd9",
"Ugrave;": "\xd9",
"Umacr;": "\u016a",
"UnderBar;": "_",
"UnderBrace;": "\u23df",
"UnderBracket;": "\u23b5",
"UnderParenthesis;": "\u23dd",
"Union;": "\u22c3",
"UnionPlus;": "\u228e",
"Uogon;": "\u0172",
"Uopf;": "\U0001d54c",
"UpArrow;": "\u2191",
"UpArrowBar;": "\u2912",
"UpArrowDownArrow;": "\u21c5",
"UpDownArrow;": "\u2195",
"UpEquilibrium;": "\u296e",
"UpTee;": "\u22a5",
"UpTeeArrow;": "\u21a5",
"Uparrow;": "\u21d1",
"Updownarrow;": "\u21d5",
"UpperLeftArrow;": "\u2196",
"UpperRightArrow;": "\u2197",
"Upsi;": "\u03d2",
"Upsilon;": "\u03a5",
"Uring;": "\u016e",
"Uscr;": "\U0001d4b0",
"Utilde;": "\u0168",
"Uuml": "\xdc",
"Uuml;": "\xdc",
"VDash;": "\u22ab",
"Vbar;": "\u2aeb",
"Vcy;": "\u0412",
"Vdash;": "\u22a9",
"Vdashl;": "\u2ae6",
"Vee;": "\u22c1",
"Verbar;": "\u2016",
"Vert;": "\u2016",
"VerticalBar;": "\u2223",
"VerticalLine;": "|",
"VerticalSeparator;": "\u2758",
"VerticalTilde;": "\u2240",
"VeryThinSpace;": "\u200a",
"Vfr;": "\U0001d519",
"Vopf;": "\U0001d54d",
"Vscr;": "\U0001d4b1",
"Vvdash;": "\u22aa",
"Wcirc;": "\u0174",
"Wedge;": "\u22c0",
"Wfr;": "\U0001d51a",
"Wopf;": "\U0001d54e",
"Wscr;": "\U0001d4b2",
"Xfr;": "\U0001d51b",
"Xi;": "\u039e",
"Xopf;": "\U0001d54f",
"Xscr;": "\U0001d4b3",
"YAcy;": "\u042f",
"YIcy;": "\u0407",
"YUcy;": "\u042e",
"Yacute": "\xdd",
"Yacute;": "\xdd",
"Ycirc;": "\u0176",
"Ycy;": "\u042b",
"Yfr;": "\U0001d51c",
"Yopf;": "\U0001d550",
"Yscr;": "\U0001d4b4",
"Yuml;": "\u0178",
"ZHcy;": "\u0416",
"Zacute;": "\u0179",
"Zcaron;": "\u017d",
"Zcy;": "\u0417",
"Zdot;": "\u017b",
"ZeroWidthSpace;": "\u200b",
"Zeta;": "\u0396",
"Zfr;": "\u2128",
"Zopf;": "\u2124",
"Zscr;": "\U0001d4b5",
"aacute": "\xe1",
"aacute;": "\xe1",
"abreve;": "\u0103",
"ac;": "\u223e",
"acE;": "\u223e\u0333",
"acd;": "\u223f",
"acirc": "\xe2",
"acirc;": "\xe2",
"acute": "\xb4",
"acute;": "\xb4",
"acy;": "\u0430",
"aelig": "\xe6",
"aelig;": "\xe6",
"af;": "\u2061",
"afr;": "\U0001d51e",
"agrave": "\xe0",
"agrave;": "\xe0",
"alefsym;": "\u2135",
"aleph;": "\u2135",
"alpha;": "\u03b1",
"amacr;": "\u0101",
"amalg;": "\u2a3f",
"amp": "&",
"amp;": "&",
"and;": "\u2227",
"andand;": "\u2a55",
"andd;": "\u2a5c",
"andslope;": "\u2a58",
"andv;": "\u2a5a",
"ang;": "\u2220",
"ange;": "\u29a4",
"angle;": "\u2220",
"angmsd;": "\u2221",
"angmsdaa;": "\u29a8",
"angmsdab;": "\u29a9",
"angmsdac;": "\u29aa",
"angmsdad;": "\u29ab",
"angmsdae;": "\u29ac",
"angmsdaf;": "\u29ad",
"angmsdag;": "\u29ae",
"angmsdah;": "\u29af",
"angrt;": "\u221f",
"angrtvb;": "\u22be",
"angrtvbd;": "\u299d",
"angsph;": "\u2222",
"angst;": "\xc5",
"angzarr;": "\u237c",
"aogon;": "\u0105",
"aopf;": "\U0001d552",
"ap;": "\u2248",
"apE;": "\u2a70",
"apacir;": "\u2a6f",
"ape;": "\u224a",
"apid;": "\u224b",
"apos;": "'",
"approx;": "\u2248",
"approxeq;": "\u224a",
"aring": "\xe5",
"aring;": "\xe5",
"ascr;": "\U0001d4b6",
"ast;": "*",
"asymp;": "\u2248",
"asympeq;": "\u224d",
"atilde": "\xe3",
"atilde;": "\xe3",
"auml": "\xe4",
"auml;": "\xe4",
"awconint;": "\u2233",
"awint;": "\u2a11",
"bNot;": "\u2aed",
"backcong;": "\u224c",
"backepsilon;": "\u03f6",
"backprime;": "\u2035",
"backsim;": "\u223d",
"backsimeq;": "\u22cd",
"barvee;": "\u22bd",
"barwed;": "\u2305",
"barwedge;": "\u2305",
"bbrk;": "\u23b5",
"bbrktbrk;": "\u23b6",
"bcong;": "\u224c",
"bcy;": "\u0431",
"bdquo;": "\u201e",
"becaus;": "\u2235",
"because;": "\u2235",
"bemptyv;": "\u29b0",
"bepsi;": "\u03f6",
"bernou;": "\u212c",
"beta;": "\u03b2",
"beth;": "\u2136",
"between;": "\u226c",
"bfr;": "\U0001d51f",
"bigcap;": "\u22c2",
"bigcirc;": "\u25ef",
"bigcup;": "\u22c3",
"bigodot;": "\u2a00",
"bigoplus;": "\u2a01",
"bigotimes;": "\u2a02",
"bigsqcup;": "\u2a06",
"bigstar;": "\u2605",
"bigtriangledown;": "\u25bd",
"bigtriangleup;": "\u25b3",
"biguplus;": "\u2a04",
"bigvee;": "\u22c1",
"bigwedge;": "\u22c0",
"bkarow;": "\u290d",
"blacklozenge;": "\u29eb",
"blacksquare;": "\u25aa",
"blacktriangle;": "\u25b4",
"blacktriangledown;": "\u25be",
"blacktriangleleft;": "\u25c2",
"blacktriangleright;": "\u25b8",
"blank;": "\u2423",
"blk12;": "\u2592",
"blk14;": "\u2591",
"blk34;": "\u2593",
"block;": "\u2588",
"bne;": "=\u20e5",
"bnequiv;": "\u2261\u20e5",
"bnot;": "\u2310",
"bopf;": "\U0001d553",
"bot;": "\u22a5",
"bottom;": "\u22a5",
"bowtie;": "\u22c8",
"boxDL;": "\u2557",
"boxDR;": "\u2554",
"boxDl;": "\u2556",
"boxDr;": "\u2553",
"boxH;": "\u2550",
"boxHD;": "\u2566",
"boxHU;": "\u2569",
"boxHd;": "\u2564",
"boxHu;": "\u2567",
"boxUL;": "\u255d",
"boxUR;": "\u255a",
"boxUl;": "\u255c",
"boxUr;": "\u2559",
"boxV;": "\u2551",
"boxVH;": "\u256c",
"boxVL;": "\u2563",
"boxVR;": "\u2560",
"boxVh;": "\u256b",
"boxVl;": "\u2562",
"boxVr;": "\u255f",
"boxbox;": "\u29c9",
"boxdL;": "\u2555",
"boxdR;": "\u2552",
"boxdl;": "\u2510",
"boxdr;": "\u250c",
"boxh;": "\u2500",
"boxhD;": "\u2565",
"boxhU;": "\u2568",
"boxhd;": "\u252c",
"boxhu;": "\u2534",
"boxminus;": "\u229f",
"boxplus;": "\u229e",
"boxtimes;": "\u22a0",
"boxuL;": "\u255b",
"boxuR;": "\u2558",
"boxul;": "\u2518",
"boxur;": "\u2514",
"boxv;": "\u2502",
"boxvH;": "\u256a",
"boxvL;": "\u2561",
"boxvR;": "\u255e",
"boxvh;": "\u253c",
"boxvl;": "\u2524",
"boxvr;": "\u251c",
"bprime;": "\u2035",
"breve;": "\u02d8",
"brvbar": "\xa6",
"brvbar;": "\xa6",
"bscr;": "\U0001d4b7",
"bsemi;": "\u204f",
"bsim;": "\u223d",
"bsime;": "\u22cd",
"bsol;": "\\",
"bsolb;": "\u29c5",
"bsolhsub;": "\u27c8",
"bull;": "\u2022",
"bullet;": "\u2022",
"bump;": "\u224e",
"bumpE;": "\u2aae",
"bumpe;": "\u224f",
"bumpeq;": "\u224f",
"cacute;": "\u0107",
"cap;": "\u2229",
"capand;": "\u2a44",
"capbrcup;": "\u2a49",
"capcap;": "\u2a4b",
"capcup;": "\u2a47",
"capdot;": "\u2a40",
"caps;": "\u2229\ufe00",
"caret;": "\u2041",
"caron;": "\u02c7",
"ccaps;": "\u2a4d",
"ccaron;": "\u010d",
"ccedil": "\xe7",
"ccedil;": "\xe7",
"ccirc;": "\u0109",
"ccups;": "\u2a4c",
"ccupssm;": "\u2a50",
"cdot;": "\u010b",
"cedil": "\xb8",
"cedil;": "\xb8",
"cemptyv;": "\u29b2",
"cent": "\xa2",
"cent;": "\xa2",
"centerdot;": "\xb7",
"cfr;": "\U0001d520",
"chcy;": "\u0447",
"check;": "\u2713",
"checkmark;": "\u2713",
"chi;": "\u03c7",
"cir;": "\u25cb",
"cirE;": "\u29c3",
"circ;": "\u02c6",
"circeq;": "\u2257",
"circlearrowleft;": "\u21ba",
"circlearrowright;": "\u21bb",
"circledR;": "\xae",
"circledS;": "\u24c8",
"circledast;": "\u229b",
"circledcirc;": "\u229a",
"circleddash;": "\u229d",
"cire;": "\u2257",
"cirfnint;": "\u2a10",
"cirmid;": "\u2aef",
"cirscir;": "\u29c2",
"clubs;": "\u2663",
"clubsuit;": "\u2663",
"colon;": ":",
"colone;": "\u2254",
"coloneq;": "\u2254",
"comma;": ",",
"commat;": "@",
"comp;": "\u2201",
"compfn;": "\u2218",
"complement;": "\u2201",
"complexes;": "\u2102",
"cong;": "\u2245",
"congdot;": "\u2a6d",
"conint;": "\u222e",
"copf;": "\U0001d554",
"coprod;": "\u2210",
"copy": "\xa9",
"copy;": "\xa9",
"copysr;": "\u2117",
"crarr;": "\u21b5",
"cross;": "\u2717",
"cscr;": "\U0001d4b8",
"csub;": "\u2acf",
"csube;": "\u2ad1",
"csup;": "\u2ad0",
"csupe;": "\u2ad2",
"ctdot;": "\u22ef",
"cudarrl;": "\u2938",
"cudarrr;": "\u2935",
"cuepr;": "\u22de",
"cuesc;": "\u22df",
"cularr;": "\u21b6",
"cularrp;": "\u293d",
"cup;": "\u222a",
"cupbrcap;": "\u2a48",
"cupcap;": "\u2a46",
"cupcup;": "\u2a4a",
"cupdot;": "\u228d",
"cupor;": "\u2a45",
"cups;": "\u222a\ufe00",
"curarr;": "\u21b7",
"curarrm;": "\u293c",
"curlyeqprec;": "\u22de",
"curlyeqsucc;": "\u22df",
"curlyvee;": "\u22ce",
"curlywedge;": "\u22cf",
"curren": "\xa4",
"curren;": "\xa4",
"curvearrowleft;": "\u21b6",
"curvearrowright;": "\u21b7",
"cuvee;": "\u22ce",
"cuwed;": "\u22cf",
"cwconint;": "\u2232",
"cwint;": "\u2231",
"cylcty;": "\u232d",
"dArr;": "\u21d3",
"dHar;": "\u2965",
"dagger;": "\u2020",
"daleth;": "\u2138",
"darr;": "\u2193",
"dash;": "\u2010",
"dashv;": "\u22a3",
"dbkarow;": "\u290f",
"dblac;": "\u02dd",
"dcaron;": "\u010f",
"dcy;": "\u0434",
"dd;": "\u2146",
"ddagger;": "\u2021",
"ddarr;": "\u21ca",
"ddotseq;": "\u2a77",
"deg": "\xb0",
"deg;": "\xb0",
"delta;": "\u03b4",
"demptyv;": "\u29b1",
"dfisht;": "\u297f",
"dfr;": "\U0001d521",
"dharl;": "\u21c3",
"dharr;": "\u21c2",
"diam;": "\u22c4",
"diamond;": "\u22c4",
"diamondsuit;": "\u2666",
"diams;": "\u2666",
"die;": "\xa8",
"digamma;": "\u03dd",
"disin;": "\u22f2",
"div;": "\xf7",
"divide": "\xf7",
"divide;": "\xf7",
"divideontimes;": "\u22c7",
"divonx;": "\u22c7",
"djcy;": "\u0452",
"dlcorn;": "\u231e",
"dlcrop;": "\u230d",
"dollar;": "$",
"dopf;": "\U0001d555",
"dot;": "\u02d9",
"doteq;": "\u2250",
"doteqdot;": "\u2251",
"dotminus;": "\u2238",
"dotplus;": "\u2214",
"dotsquare;": "\u22a1",
"doublebarwedge;": "\u2306",
"downarrow;": "\u2193",
"downdownarrows;": "\u21ca",
"downharpoonleft;": "\u21c3",
"downharpoonright;": "\u21c2",
"drbkarow;": "\u2910",
"drcorn;": "\u231f",
"drcrop;": "\u230c",
"dscr;": "\U0001d4b9",
"dscy;": "\u0455",
"dsol;": "\u29f6",
"dstrok;": "\u0111",
"dtdot;": "\u22f1",
"dtri;": "\u25bf",
"dtrif;": "\u25be",
"duarr;": "\u21f5",
"duhar;": "\u296f",
"dwangle;": "\u29a6",
"dzcy;": "\u045f",
"dzigrarr;": "\u27ff",
"eDDot;": "\u2a77",
"eDot;": "\u2251",
"eacute": "\xe9",
"eacute;": "\xe9",
"easter;": "\u2a6e",
"ecaron;": "\u011b",
"ecir;": "\u2256",
"ecirc": "\xea",
"ecirc;": "\xea",
"ecolon;": "\u2255",
"ecy;": "\u044d",
"edot;": "\u0117",
"ee;": "\u2147",
"efDot;": "\u2252",
"efr;": "\U0001d522",
"eg;": "\u2a9a",
"egrave": "\xe8",
"egrave;": "\xe8",
"egs;": "\u2a96",
"egsdot;": "\u2a98",
"el;": "\u2a99",
"elinters;": "\u23e7",
"ell;": "\u2113",
"els;": "\u2a95",
"elsdot;": "\u2a97",
"emacr;": "\u0113",
"empty;": "\u2205",
"emptyset;": "\u2205",
"emptyv;": "\u2205",
"emsp13;": "\u2004",
"emsp14;": "\u2005",
"emsp;": "\u2003",
"eng;": "\u014b",
"ensp;": "\u2002",
"eogon;": "\u0119",
"eopf;": "\U0001d556",
"epar;": "\u22d5",
"eparsl;": "\u29e3",
"eplus;": "\u2a71",
"epsi;": "\u03b5",
"epsilon;": "\u03b5",
"epsiv;": "\u03f5",
"eqcirc;": "\u2256",
"eqcolon;": "\u2255",
"eqsim;": "\u2242",
"eqslantgtr;": "\u2a96",
"eqslantless;": "\u2a95",
"equals;": "=",
"equest;": "\u225f",
"equiv;": "\u2261",
"equivDD;": "\u2a78",
"eqvparsl;": "\u29e5",
"erDot;": "\u2253",
"erarr;": "\u2971",
"escr;": "\u212f",
"esdot;": "\u2250",
"esim;": "\u2242",
"eta;": "\u03b7",
"eth": "\xf0",
"eth;": "\xf0",
"euml": "\xeb",
"euml;": "\xeb",
"euro;": "\u20ac",
"excl;": "!",
"exist;": "\u2203",
"expectation;": "\u2130",
"exponentiale;": "\u2147",
"fallingdotseq;": "\u2252",
"fcy;": "\u0444",
"female;": "\u2640",
"ffilig;": "\ufb03",
"fflig;": "\ufb00",
"ffllig;": "\ufb04",
"ffr;": "\U0001d523",
"filig;": "\ufb01",
"fjlig;": "fj",
"flat;": "\u266d",
"fllig;": "\ufb02",
"fltns;": "\u25b1",
"fnof;": "\u0192",
"fopf;": "\U0001d557",
"forall;": "\u2200",
"fork;": "\u22d4",
"forkv;": "\u2ad9",
"fpartint;": "\u2a0d",
"frac12": "\xbd",
"frac12;": "\xbd",
"frac13;": "\u2153",
"frac14": "\xbc",
"frac14;": "\xbc",
"frac15;": "\u2155",
"frac16;": "\u2159",
"frac18;": "\u215b",
"frac23;": "\u2154",
"frac25;": "\u2156",
"frac34": "\xbe",
"frac34;": "\xbe",
"frac35;": "\u2157",
"frac38;": "\u215c",
"frac45;": "\u2158",
"frac56;": "\u215a",
"frac58;": "\u215d",
"frac78;": "\u215e",
"frasl;": "\u2044",
"frown;": "\u2322",
"fscr;": "\U0001d4bb",
"gE;": "\u2267",
"gEl;": "\u2a8c",
"gacute;": "\u01f5",
"gamma;": "\u03b3",
"gammad;": "\u03dd",
"gap;": "\u2a86",
"gbreve;": "\u011f",
"gcirc;": "\u011d",
"gcy;": "\u0433",
"gdot;": "\u0121",
"ge;": "\u2265",
"gel;": "\u22db",
"geq;": "\u2265",
"geqq;": "\u2267",
"geqslant;": "\u2a7e",
"ges;": "\u2a7e",
"gescc;": "\u2aa9",
"gesdot;": "\u2a80",
"gesdoto;": "\u2a82",
"gesdotol;": "\u2a84",
"gesl;": "\u22db\ufe00",
"gesles;": "\u2a94",
"gfr;": "\U0001d524",
"gg;": "\u226b",
"ggg;": "\u22d9",
"gimel;": "\u2137",
"gjcy;": "\u0453",
"gl;": "\u2277",
"glE;": "\u2a92",
"gla;": "\u2aa5",
"glj;": "\u2aa4",
"gnE;": "\u2269",
"gnap;": "\u2a8a",
"gnapprox;": "\u2a8a",
"gne;": "\u2a88",
"gneq;": "\u2a88",
"gneqq;": "\u2269",
"gnsim;": "\u22e7",
"gopf;": "\U0001d558",
"grave;": "`",
"gscr;": "\u210a",
"gsim;": "\u2273",
"gsime;": "\u2a8e",
"gsiml;": "\u2a90",
"gt": ">",
"gt;": ">",
"gtcc;": "\u2aa7",
"gtcir;": "\u2a7a",
"gtdot;": "\u22d7",
"gtlPar;": "\u2995",
"gtquest;": "\u2a7c",
"gtrapprox;": "\u2a86",
"gtrarr;": "\u2978",
"gtrdot;": "\u22d7",
"gtreqless;": "\u22db",
"gtreqqless;": "\u2a8c",
"gtrless;": "\u2277",
"gtrsim;": "\u2273",
"gvertneqq;": "\u2269\ufe00",
"gvnE;": "\u2269\ufe00",
"hArr;": "\u21d4",
"hairsp;": "\u200a",
"half;": "\xbd",
"hamilt;": "\u210b",
"hardcy;": "\u044a",
"harr;": "\u2194",
"harrcir;": "\u2948",
"harrw;": "\u21ad",
"hbar;": "\u210f",
"hcirc;": "\u0125",
"hearts;": "\u2665",
"heartsuit;": "\u2665",
"hellip;": "\u2026",
"hercon;": "\u22b9",
"hfr;": "\U0001d525",
"hksearow;": "\u2925",
"hkswarow;": "\u2926",
"hoarr;": "\u21ff",
"homtht;": "\u223b",
"hookleftarrow;": "\u21a9",
"hookrightarrow;": "\u21aa",
"hopf;": "\U0001d559",
"horbar;": "\u2015",
"hscr;": "\U0001d4bd",
"hslash;": "\u210f",
"hstrok;": "\u0127",
"hybull;": "\u2043",
"hyphen;": "\u2010",
"iacute": "\xed",
"iacute;": "\xed",
"ic;": "\u2063",
"icirc": "\xee",
"icirc;": "\xee",
"icy;": "\u0438",
"iecy;": "\u0435",
"iexcl": "\xa1",
"iexcl;": "\xa1",
"iff;": "\u21d4",
"ifr;": "\U0001d526",
"igrave": "\xec",
"igrave;": "\xec",
"ii;": "\u2148",
"iiiint;": "\u2a0c",
"iiint;": "\u222d",
"iinfin;": "\u29dc",
"iiota;": "\u2129",
"ijlig;": "\u0133",
"imacr;": "\u012b",
"image;": "\u2111",
"imagline;": "\u2110",
"imagpart;": "\u2111",
"imath;": "\u0131",
"imof;": "\u22b7",
"imped;": "\u01b5",
"in;": "\u2208",
"incare;": "\u2105",
"infin;": "\u221e",
"infintie;": "\u29dd",
"inodot;": "\u0131",
"int;": "\u222b",
"intcal;": "\u22ba",
"integers;": "\u2124",
"intercal;": "\u22ba",
"intlarhk;": "\u2a17",
"intprod;": "\u2a3c",
"iocy;": "\u0451",
"iogon;": "\u012f",
"iopf;": "\U0001d55a",
"iota;": "\u03b9",
"iprod;": "\u2a3c",
"iquest": "\xbf",
"iquest;": "\xbf",
"iscr;": "\U0001d4be",
"isin;": "\u2208",
"isinE;": "\u22f9",
"isindot;": "\u22f5",
"isins;": "\u22f4",
"isinsv;": "\u22f3",
"isinv;": "\u2208",
"it;": "\u2062",
"itilde;": "\u0129",
"iukcy;": "\u0456",
"iuml": "\xef",
"iuml;": "\xef",
"jcirc;": "\u0135",
"jcy;": "\u0439",
"jfr;": "\U0001d527",
"jmath;": "\u0237",
"jopf;": "\U0001d55b",
"jscr;": "\U0001d4bf",
"jsercy;": "\u0458",
"jukcy;": "\u0454",
"kappa;": "\u03ba",
"kappav;": "\u03f0",
"kcedil;": "\u0137",
"kcy;": "\u043a",
"kfr;": "\U0001d528",
"kgreen;": "\u0138",
"khcy;": "\u0445",
"kjcy;": "\u045c",
"kopf;": "\U0001d55c",
"kscr;": "\U0001d4c0",
"lAarr;": "\u21da",
"lArr;": "\u21d0",
"lAtail;": "\u291b",
"lBarr;": "\u290e",
"lE;": "\u2266",
"lEg;": "\u2a8b",
"lHar;": "\u2962",
"lacute;": "\u013a",
"laemptyv;": "\u29b4",
"lagran;": "\u2112",
"lambda;": "\u03bb",
"lang;": "\u27e8",
"langd;": "\u2991",
"langle;": "\u27e8",
"lap;": "\u2a85",
"laquo": "\xab",
"laquo;": "\xab",
"larr;": "\u2190",
"larrb;": "\u21e4",
"larrbfs;": "\u291f",
"larrfs;": "\u291d",
"larrhk;": "\u21a9",
"larrlp;": "\u21ab",
"larrpl;": "\u2939",
"larrsim;": "\u2973",
"larrtl;": "\u21a2",
"lat;": "\u2aab",
"latail;": "\u2919",
"late;": "\u2aad",
"lates;": "\u2aad\ufe00",
"lbarr;": "\u290c",
"lbbrk;": "\u2772",
"lbrace;": "{",
"lbrack;": "[",
"lbrke;": "\u298b",
"lbrksld;": "\u298f",
"lbrkslu;": "\u298d",
"lcaron;": "\u013e",
"lcedil;": "\u013c",
"lceil;": "\u2308",
"lcub;": "{",
"lcy;": "\u043b",
"ldca;": "\u2936",
"ldquo;": "\u201c",
"ldquor;": "\u201e",
"ldrdhar;": "\u2967",
"ldrushar;": "\u294b",
"ldsh;": "\u21b2",
"le;": "\u2264",
"leftarrow;": "\u2190",
"leftarrowtail;": "\u21a2",
"leftharpoondown;": "\u21bd",
"leftharpoonup;": "\u21bc",
"leftleftarrows;": "\u21c7",
"leftrightarrow;": "\u2194",
"leftrightarrows;": "\u21c6",
"leftrightharpoons;": "\u21cb",
"leftrightsquigarrow;": "\u21ad",
"leftthreetimes;": "\u22cb",
"leg;": "\u22da",
"leq;": "\u2264",
"leqq;": "\u2266",
"leqslant;": "\u2a7d",
"les;": "\u2a7d",
"lescc;": "\u2aa8",
"lesdot;": "\u2a7f",
"lesdoto;": "\u2a81",
"lesdotor;": "\u2a83",
"lesg;": "\u22da\ufe00",
"lesges;": "\u2a93",
"lessapprox;": "\u2a85",
"lessdot;": "\u22d6",
"lesseqgtr;": "\u22da",
"lesseqqgtr;": "\u2a8b",
"lessgtr;": "\u2276",
"lesssim;": "\u2272",
"lfisht;": "\u297c",
"lfloor;": "\u230a",
"lfr;": "\U0001d529",
"lg;": "\u2276",
"lgE;": "\u2a91",
"lhard;": "\u21bd",
"lharu;": "\u21bc",
"lharul;": "\u296a",
"lhblk;": "\u2584",
"ljcy;": "\u0459",
"ll;": "\u226a",
"llarr;": "\u21c7",
"llcorner;": "\u231e",
"llhard;": "\u296b",
"lltri;": "\u25fa",
"lmidot;": "\u0140",
"lmoust;": "\u23b0",
"lmoustache;": "\u23b0",
"lnE;": "\u2268",
"lnap;": "\u2a89",
"lnapprox;": "\u2a89",
"lne;": "\u2a87",
"lneq;": "\u2a87",
"lneqq;": "\u2268",
"lnsim;": "\u22e6",
"loang;": "\u27ec",
"loarr;": "\u21fd",
"lobrk;": "\u27e6",
"longleftarrow;": "\u27f5",
"longleftrightarrow;": "\u27f7",
"longmapsto;": "\u27fc",
"longrightarrow;": "\u27f6",
"looparrowleft;": "\u21ab",
"looparrowright;": "\u21ac",
"lopar;": "\u2985",
"lopf;": "\U0001d55d",
"loplus;": "\u2a2d",
"lotimes;": "\u2a34",
"lowast;": "\u2217",
"lowbar;": "_",
"loz;": "\u25ca",
"lozenge;": "\u25ca",
"lozf;": "\u29eb",
"lpar;": "(",
"lparlt;": "\u2993",
"lrarr;": "\u21c6",
"lrcorner;": "\u231f",
"lrhar;": "\u21cb",
"lrhard;": "\u296d",
"lrm;": "\u200e",
"lrtri;": "\u22bf",
"lsaquo;": "\u2039",
"lscr;": "\U0001d4c1",
"lsh;": "\u21b0",
"lsim;": "\u2272",
"lsime;": "\u2a8d",
"lsimg;": "\u2a8f",
"lsqb;": "[",
"lsquo;": "\u2018",
"lsquor;": "\u201a",
"lstrok;": "\u0142",
"lt": "<",
"lt;": "<",
"ltcc;": "\u2aa6",
"ltcir;": "\u2a79",
"ltdot;": "\u22d6",
"lthree;": "\u22cb",
"ltimes;": "\u22c9",
"ltlarr;": "\u2976",
"ltquest;": "\u2a7b",
"ltrPar;": "\u2996",
"ltri;": "\u25c3",
"ltrie;": "\u22b4",
"ltrif;": "\u25c2",
"lurdshar;": "\u294a",
"luruhar;": "\u2966",
"lvertneqq;": "\u2268\ufe00",
"lvnE;": "\u2268\ufe00",
"mDDot;": "\u223a",
"macr": "\xaf",
"macr;": "\xaf",
"male;": "\u2642",
"malt;": "\u2720",
"maltese;": "\u2720",
"map;": "\u21a6",
"mapsto;": "\u21a6",
"mapstodown;": "\u21a7",
"mapstoleft;": "\u21a4",
"mapstoup;": "\u21a5",
"marker;": "\u25ae",
"mcomma;": "\u2a29",
"mcy;": "\u043c",
"mdash;": "\u2014",
"measuredangle;": "\u2221",
"mfr;": "\U0001d52a",
"mho;": "\u2127",
"micro": "\xb5",
"micro;": "\xb5",
"mid;": "\u2223",
"midast;": "*",
"midcir;": "\u2af0",
"middot": "\xb7",
"middot;": "\xb7",
"minus;": "\u2212",
"minusb;": "\u229f",
"minusd;": "\u2238",
"minusdu;": "\u2a2a",
"mlcp;": "\u2adb",
"mldr;": "\u2026",
"mnplus;": "\u2213",
"models;": "\u22a7",
"mopf;": "\U0001d55e",
"mp;": "\u2213",
"mscr;": "\U0001d4c2",
"mstpos;": "\u223e",
"mu;": "\u03bc",
"multimap;": "\u22b8",
"mumap;": "\u22b8",
"nGg;": "\u22d9\u0338",
"nGt;": "\u226b\u20d2",
"nGtv;": "\u226b\u0338",
"nLeftarrow;": "\u21cd",
"nLeftrightarrow;": "\u21ce",
"nLl;": "\u22d8\u0338",
"nLt;": "\u226a\u20d2",
"nLtv;": "\u226a\u0338",
"nRightarrow;": "\u21cf",
"nVDash;": "\u22af",
"nVdash;": "\u22ae",
"nabla;": "\u2207",
"nacute;": "\u0144",
"nang;": "\u2220\u20d2",
"nap;": "\u2249",
"napE;": "\u2a70\u0338",
"napid;": "\u224b\u0338",
"napos;": "\u0149",
"napprox;": "\u2249",
"natur;": "\u266e",
"natural;": "\u266e",
"naturals;": "\u2115",
"nbsp": "\xa0",
"nbsp;": "\xa0",
"nbump;": "\u224e\u0338",
"nbumpe;": "\u224f\u0338",
"ncap;": "\u2a43",
"ncaron;": "\u0148",
"ncedil;": "\u0146",
"ncong;": "\u2247",
"ncongdot;": "\u2a6d\u0338",
"ncup;": "\u2a42",
"ncy;": "\u043d",
"ndash;": "\u2013",
"ne;": "\u2260",
"neArr;": "\u21d7",
"nearhk;": "\u2924",
"nearr;": "\u2197",
"nearrow;": "\u2197",
"nedot;": "\u2250\u0338",
"nequiv;": "\u2262",
"nesear;": "\u2928",
"nesim;": "\u2242\u0338",
"nexist;": "\u2204",
"nexists;": "\u2204",
"nfr;": "\U0001d52b",
"ngE;": "\u2267\u0338",
"nge;": "\u2271",
"ngeq;": "\u2271",
"ngeqq;": "\u2267\u0338",
"ngeqslant;": "\u2a7e\u0338",
"nges;": "\u2a7e\u0338",
"ngsim;": "\u2275",
"ngt;": "\u226f",
"ngtr;": "\u226f",
"nhArr;": "\u21ce",
"nharr;": "\u21ae",
"nhpar;": "\u2af2",
"ni;": "\u220b",
"nis;": "\u22fc",
"nisd;": "\u22fa",
"niv;": "\u220b",
"njcy;": "\u045a",
"nlArr;": "\u21cd",
"nlE;": "\u2266\u0338",
"nlarr;": "\u219a",
"nldr;": "\u2025",
"nle;": "\u2270",
"nleftarrow;": "\u219a",
"nleftrightarrow;": "\u21ae",
"nleq;": "\u2270",
"nleqq;": "\u2266\u0338",
"nleqslant;": "\u2a7d\u0338",
"nles;": "\u2a7d\u0338",
"nless;": "\u226e",
"nlsim;": "\u2274",
"nlt;": "\u226e",
"nltri;": "\u22ea",
"nltrie;": "\u22ec",
"nmid;": "\u2224",
"nopf;": "\U0001d55f",
"not": "\xac",
"not;": "\xac",
"notin;": "\u2209",
"notinE;": "\u22f9\u0338",
"notindot;": "\u22f5\u0338",
"notinva;": "\u2209",
"notinvb;": "\u22f7",
"notinvc;": "\u22f6",
"notni;": "\u220c",
"notniva;": "\u220c",
"notnivb;": "\u22fe",
"notnivc;": "\u22fd",
"npar;": "\u2226",
"nparallel;": "\u2226",
"nparsl;": "\u2afd\u20e5",
"npart;": "\u2202\u0338",
"npolint;": "\u2a14",
"npr;": "\u2280",
"nprcue;": "\u22e0",
"npre;": "\u2aaf\u0338",
"nprec;": "\u2280",
"npreceq;": "\u2aaf\u0338",
"nrArr;": "\u21cf",
"nrarr;": "\u219b",
"nrarrc;": "\u2933\u0338",
"nrarrw;": "\u219d\u0338",
"nrightarrow;": "\u219b",
"nrtri;": "\u22eb",
"nrtrie;": "\u22ed",
"nsc;": "\u2281",
"nsccue;": "\u22e1",
"nsce;": "\u2ab0\u0338",
"nscr;": "\U0001d4c3",
"nshortmid;": "\u2224",
"nshortparallel;": "\u2226",
"nsim;": "\u2241",
"nsime;": "\u2244",
"nsimeq;": "\u2244",
"nsmid;": "\u2224",
"nspar;": "\u2226",
"nsqsube;": "\u22e2",
"nsqsupe;": "\u22e3",
"nsub;": "\u2284",
"nsubE;": "\u2ac5\u0338",
"nsube;": "\u2288",
"nsubset;": "\u2282\u20d2",
"nsubseteq;": "\u2288",
"nsubseteqq;": "\u2ac5\u0338",
"nsucc;": "\u2281",
"nsucceq;": "\u2ab0\u0338",
"nsup;": "\u2285",
"nsupE;": "\u2ac6\u0338",
"nsupe;": "\u2289",
"nsupset;": "\u2283\u20d2",
"nsupseteq;": "\u2289",
"nsupseteqq;": "\u2ac6\u0338",
"ntgl;": "\u2279",
"ntilde": "\xf1",
"ntilde;": "\xf1",
"ntlg;": "\u2278",
"ntriangleleft;": "\u22ea",
"ntrianglelefteq;": "\u22ec",
"ntriangleright;": "\u22eb",
"ntrianglerighteq;": "\u22ed",
"nu;": "\u03bd",
"num;": "#",
"numero;": "\u2116",
"numsp;": "\u2007",
"nvDash;": "\u22ad",
"nvHarr;": "\u2904",
"nvap;": "\u224d\u20d2",
"nvdash;": "\u22ac",
"nvge;": "\u2265\u20d2",
"nvgt;": ">\u20d2",
"nvinfin;": "\u29de",
"nvlArr;": "\u2902",
"nvle;": "\u2264\u20d2",
"nvlt;": "<\u20d2",
"nvltrie;": "\u22b4\u20d2",
"nvrArr;": "\u2903",
"nvrtrie;": "\u22b5\u20d2",
"nvsim;": "\u223c\u20d2",
"nwArr;": "\u21d6",
"nwarhk;": "\u2923",
"nwarr;": "\u2196",
"nwarrow;": "\u2196",
"nwnear;": "\u2927",
"oS;": "\u24c8",
"oacute": "\xf3",
"oacute;": "\xf3",
"oast;": "\u229b",
"ocir;": "\u229a",
"ocirc": "\xf4",
"ocirc;": "\xf4",
"ocy;": "\u043e",
"odash;": "\u229d",
"odblac;": "\u0151",
"odiv;": "\u2a38",
"odot;": "\u2299",
"odsold;": "\u29bc",
"oelig;": "\u0153",
"ofcir;": "\u29bf",
"ofr;": "\U0001d52c",
"ogon;": "\u02db",
"ograve": "\xf2",
"ograve;": "\xf2",
"ogt;": "\u29c1",
"ohbar;": "\u29b5",
"ohm;": "\u03a9",
"oint;": "\u222e",
"olarr;": "\u21ba",
"olcir;": "\u29be",
"olcross;": "\u29bb",
"oline;": "\u203e",
"olt;": "\u29c0",
"omacr;": "\u014d",
"omega;": "\u03c9",
"omicron;": "\u03bf",
"omid;": "\u29b6",
"ominus;": "\u2296",
"oopf;": "\U0001d560",
"opar;": "\u29b7",
"operp;": "\u29b9",
"oplus;": "\u2295",
"or;": "\u2228",
"orarr;": "\u21bb",
"ord;": "\u2a5d",
"order;": "\u2134",
"orderof;": "\u2134",
"ordf": "\xaa",
"ordf;": "\xaa",
"ordm": "\xba",
"ordm;": "\xba",
"origof;": "\u22b6",
"oror;": "\u2a56",
"orslope;": "\u2a57",
"orv;": "\u2a5b",
"oscr;": "\u2134",
"oslash": "\xf8",
"oslash;": "\xf8",
"osol;": "\u2298",
"otilde": "\xf5",
"otilde;": "\xf5",
"otimes;": "\u2297",
"otimesas;": "\u2a36",
"ouml": "\xf6",
"ouml;": "\xf6",
"ovbar;": "\u233d",
"par;": "\u2225",
"para": "\xb6",
"para;": "\xb6",
"parallel;": "\u2225",
"parsim;": "\u2af3",
"parsl;": "\u2afd",
"part;": "\u2202",
"pcy;": "\u043f",
"percnt;": "%",
"period;": ".",
"permil;": "\u2030",
"perp;": "\u22a5",
"pertenk;": "\u2031",
"pfr;": "\U0001d52d",
"phi;": "\u03c6",
"phiv;": "\u03d5",
"phmmat;": "\u2133",
"phone;": "\u260e",
"pi;": "\u03c0",
"pitchfork;": "\u22d4",
"piv;": "\u03d6",
"planck;": "\u210f",
"planckh;": "\u210e",
"plankv;": "\u210f",
"plus;": "+",
"plusacir;": "\u2a23",
"plusb;": "\u229e",
"pluscir;": "\u2a22",
"plusdo;": "\u2214",
"plusdu;": "\u2a25",
"pluse;": "\u2a72",
"plusmn": "\xb1",
"plusmn;": "\xb1",
"plussim;": "\u2a26",
"plustwo;": "\u2a27",
"pm;": "\xb1",
"pointint;": "\u2a15",
"popf;": "\U0001d561",
"pound": "\xa3",
"pound;": "\xa3",
"pr;": "\u227a",
"prE;": "\u2ab3",
"prap;": "\u2ab7",
"prcue;": "\u227c",
"pre;": "\u2aaf",
"prec;": "\u227a",
"precapprox;": "\u2ab7",
"preccurlyeq;": "\u227c",
"preceq;": "\u2aaf",
"precnapprox;": "\u2ab9",
"precneqq;": "\u2ab5",
"precnsim;": "\u22e8",
"precsim;": "\u227e",
"prime;": "\u2032",
"primes;": "\u2119",
"prnE;": "\u2ab5",
"prnap;": "\u2ab9",
"prnsim;": "\u22e8",
"prod;": "\u220f",
"profalar;": "\u232e",
"profline;": "\u2312",
"profsurf;": "\u2313",
"prop;": "\u221d",
"propto;": "\u221d",
"prsim;": "\u227e",
"prurel;": "\u22b0",
"pscr;": "\U0001d4c5",
"psi;": "\u03c8",
"puncsp;": "\u2008",
"qfr;": "\U0001d52e",
"qint;": "\u2a0c",
"qopf;": "\U0001d562",
"qprime;": "\u2057",
"qscr;": "\U0001d4c6",
"quaternions;": "\u210d",
"quatint;": "\u2a16",
"quest;": "?",
"questeq;": "\u225f",
"quot": "\"",
"quot;": "\"",
"rAarr;": "\u21db",
"rArr;": "\u21d2",
"rAtail;": "\u291c",
"rBarr;": "\u290f",
"rHar;": "\u2964",
"race;": "\u223d\u0331",
"racute;": "\u0155",
"radic;": "\u221a",
"raemptyv;": "\u29b3",
"rang;": "\u27e9",
"rangd;": "\u2992",
"range;": "\u29a5",
"rangle;": "\u27e9",
"raquo": "\xbb",
"raquo;": "\xbb",
"rarr;": "\u2192",
"rarrap;": "\u2975",
"rarrb;": "\u21e5",
"rarrbfs;": "\u2920",
"rarrc;": "\u2933",
"rarrfs;": "\u291e",
"rarrhk;": "\u21aa",
"rarrlp;": "\u21ac",
"rarrpl;": "\u2945",
"rarrsim;": "\u2974",
"rarrtl;": "\u21a3",
"rarrw;": "\u219d",
"ratail;": "\u291a",
"ratio;": "\u2236",
"rationals;": "\u211a",
"rbarr;": "\u290d",
"rbbrk;": "\u2773",
"rbrace;": "}",
"rbrack;": "]",
"rbrke;": "\u298c",
"rbrksld;": "\u298e",
"rbrkslu;": "\u2990",
"rcaron;": "\u0159",
"rcedil;": "\u0157",
"rceil;": "\u2309",
"rcub;": "}",
"rcy;": "\u0440",
"rdca;": "\u2937",
"rdldhar;": "\u2969",
"rdquo;": "\u201d",
"rdquor;": "\u201d",
"rdsh;": "\u21b3",
"real;": "\u211c",
"realine;": "\u211b",
"realpart;": "\u211c",
"reals;": "\u211d",
"rect;": "\u25ad",
"reg": "\xae",
"reg;": "\xae",
"rfisht;": "\u297d",
"rfloor;": "\u230b",
"rfr;": "\U0001d52f",
"rhard;": "\u21c1",
"rharu;": "\u21c0",
"rharul;": "\u296c",
"rho;": "\u03c1",
"rhov;": "\u03f1",
"rightarrow;": "\u2192",
"rightarrowtail;": "\u21a3",
"rightharpoondown;": "\u21c1",
"rightharpoonup;": "\u21c0",
"rightleftarrows;": "\u21c4",
"rightleftharpoons;": "\u21cc",
"rightrightarrows;": "\u21c9",
"rightsquigarrow;": "\u219d",
"rightthreetimes;": "\u22cc",
"ring;": "\u02da",
"risingdotseq;": "\u2253",
"rlarr;": "\u21c4",
"rlhar;": "\u21cc",
"rlm;": "\u200f",
"rmoust;": "\u23b1",
"rmoustache;": "\u23b1",
"rnmid;": "\u2aee",
"roang;": "\u27ed",
"roarr;": "\u21fe",
"robrk;": "\u27e7",
"ropar;": "\u2986",
"ropf;": "\U0001d563",
"roplus;": "\u2a2e",
"rotimes;": "\u2a35",
"rpar;": ")",
"rpargt;": "\u2994",
"rppolint;": "\u2a12",
"rrarr;": "\u21c9",
"rsaquo;": "\u203a",
"rscr;": "\U0001d4c7",
"rsh;": "\u21b1",
"rsqb;": "]",
"rsquo;": "\u2019",
"rsquor;": "\u2019",
"rthree;": "\u22cc",
"rtimes;": "\u22ca",
"rtri;": "\u25b9",
"rtrie;": "\u22b5",
"rtrif;": "\u25b8",
"rtriltri;": "\u29ce",
"ruluhar;": "\u2968",
"rx;": "\u211e",
"sacute;": "\u015b",
"sbquo;": "\u201a",
"sc;": "\u227b",
"scE;": "\u2ab4",
"scap;": "\u2ab8",
"scaron;": "\u0161",
"sccue;": "\u227d",
"sce;": "\u2ab0",
"scedil;": "\u015f",
"scirc;": "\u015d",
"scnE;": "\u2ab6",
"scnap;": "\u2aba",
"scnsim;": "\u22e9",
"scpolint;": "\u2a13",
"scsim;": "\u227f",
"scy;": "\u0441",
"sdot;": "\u22c5",
"sdotb;": "\u22a1",
"sdote;": "\u2a66",
"seArr;": "\u21d8",
"searhk;": "\u2925",
"searr;": "\u2198",
"searrow;": "\u2198",
"sect": "\xa7",
"sect;": "\xa7",
"semi;": ";",
"seswar;": "\u2929",
"setminus;": "\u2216",
"setmn;": "\u2216",
"sext;": "\u2736",
"sfr;": "\U0001d530",
"sfrown;": "\u2322",
"sharp;": "\u266f",
"shchcy;": "\u0449",
"shcy;": "\u0448",
"shortmid;": "\u2223",
"shortparallel;": "\u2225",
"shy": "\xad",
"shy;": "\xad",
"sigma;": "\u03c3",
"sigmaf;": "\u03c2",
"sigmav;": "\u03c2",
"sim;": "\u223c",
"simdot;": "\u2a6a",
"sime;": "\u2243",
"simeq;": "\u2243",
"simg;": "\u2a9e",
"simgE;": "\u2aa0",
"siml;": "\u2a9d",
"simlE;": "\u2a9f",
"simne;": "\u2246",
"simplus;": "\u2a24",
"simrarr;": "\u2972",
"slarr;": "\u2190",
"smallsetminus;": "\u2216",
"smashp;": "\u2a33",
"smeparsl;": "\u29e4",
"smid;": "\u2223",
"smile;": "\u2323",
"smt;": "\u2aaa",
"smte;": "\u2aac",
"smtes;": "\u2aac\ufe00",
"softcy;": "\u044c",
"sol;": "/",
"solb;": "\u29c4",
"solbar;": "\u233f",
"sopf;": "\U0001d564",
"spades;": "\u2660",
"spadesuit;": "\u2660",
"spar;": "\u2225",
"sqcap;": "\u2293",
"sqcaps;": "\u2293\ufe00",
"sqcup;": "\u2294",
"sqcups;": "\u2294\ufe00",
"sqsub;": "\u228f",
"sqsube;": "\u2291",
"sqsubset;": "\u228f",
"sqsubseteq;": "\u2291",
"sqsup;": "\u2290",
"sqsupe;": "\u2292",
"sqsupset;": "\u2290",
"sqsupseteq;": "\u2292",
"squ;": "\u25a1",
"square;": "\u25a1",
"squarf;": "\u25aa",
"squf;": "\u25aa",
"srarr;": "\u2192",
"sscr;": "\U0001d4c8",
"ssetmn;": "\u2216",
"ssmile;": "\u2323",
"sstarf;": "\u22c6",
"star;": "\u2606",
"starf;": "\u2605",
"straightepsilon;": "\u03f5",
"straightphi;": "\u03d5",
"strns;": "\xaf",
"sub;": "\u2282",
"subE;": "\u2ac5",
"subdot;": "\u2abd",
"sube;": "\u2286",
"subedot;": "\u2ac3",
"submult;": "\u2ac1",
"subnE;": "\u2acb",
"subne;": "\u228a",
"subplus;": "\u2abf",
"subrarr;": "\u2979",
"subset;": "\u2282",
"subseteq;": "\u2286",
"subseteqq;": "\u2ac5",
"subsetneq;": "\u228a",
"subsetneqq;": "\u2acb",
"subsim;": "\u2ac7",
"subsub;": "\u2ad5",
"subsup;": "\u2ad3",
"succ;": "\u227b",
"succapprox;": "\u2ab8",
"succcurlyeq;": "\u227d",
"succeq;": "\u2ab0",
"succnapprox;": "\u2aba",
"succneqq;": "\u2ab6",
"succnsim;": "\u22e9",
"succsim;": "\u227f",
"sum;": "\u2211",
"sung;": "\u266a",
"sup1": "\xb9",
"sup1;": "\xb9",
"sup2": "\xb2",
"sup2;": "\xb2",
"sup3": "\xb3",
"sup3;": "\xb3",
"sup;": "\u2283",
"supE;": "\u2ac6",
"supdot;": "\u2abe",
"supdsub;": "\u2ad8",
"supe;": "\u2287",
"supedot;": "\u2ac4",
"suphsol;": "\u27c9",
"suphsub;": "\u2ad7",
"suplarr;": "\u297b",
"supmult;": "\u2ac2",
"supnE;": "\u2acc",
"supne;": "\u228b",
"supplus;": "\u2ac0",
"supset;": "\u2283",
"supseteq;": "\u2287",
"supseteqq;": "\u2ac6",
"supsetneq;": "\u228b",
"supsetneqq;": "\u2acc",
"supsim;": "\u2ac8",
"supsub;": "\u2ad4",
"supsup;": "\u2ad6",
"swArr;": "\u21d9",
"swarhk;": "\u2926",
"swarr;": "\u2199",
"swarrow;": "\u2199",
"swnwar;": "\u292a",
"szlig": "\xdf",
"szlig;": "\xdf",
"target;": "\u2316",
"tau;": "\u03c4",
"tbrk;": "\u23b4",
"tcaron;": "\u0165",
"tcedil;": "\u0163",
"tcy;": "\u0442",
"tdot;": "\u20db",
"telrec;": "\u2315",
"tfr;": "\U0001d531",
"there4;": "\u2234",
"therefore;": "\u2234",
"theta;": "\u03b8",
"thetasym;": "\u03d1",
"thetav;": "\u03d1",
"thickapprox;": "\u2248",
"thicksim;": "\u223c",
"thinsp;": "\u2009",
"thkap;": "\u2248",
"thksim;": "\u223c",
"thorn": "\xfe",
"thorn;": "\xfe",
"tilde;": "\u02dc",
"times": "\xd7",
"times;": "\xd7",
"timesb;": "\u22a0",
"timesbar;": "\u2a31",
"timesd;": "\u2a30",
"tint;": "\u222d",
"toea;": "\u2928",
"top;": "\u22a4",
"topbot;": "\u2336",
"topcir;": "\u2af1",
"topf;": "\U0001d565",
"topfork;": "\u2ada",
"tosa;": "\u2929",
"tprime;": "\u2034",
"trade;": "\u2122",
"triangle;": "\u25b5",
"triangledown;": "\u25bf",
"triangleleft;": "\u25c3",
"trianglelefteq;": "\u22b4",
"triangleq;": "\u225c",
"triangleright;": "\u25b9",
"trianglerighteq;": "\u22b5",
"tridot;": "\u25ec",
"trie;": "\u225c",
"triminus;": "\u2a3a",
"triplus;": "\u2a39",
"trisb;": "\u29cd",
"tritime;": "\u2a3b",
"trpezium;": "\u23e2",
"tscr;": "\U0001d4c9",
"tscy;": "\u0446",
"tshcy;": "\u045b",
"tstrok;": "\u0167",
"twixt;": "\u226c",
"twoheadleftarrow;": "\u219e",
"twoheadrightarrow;": "\u21a0",
"uArr;": "\u21d1",
"uHar;": "\u2963",
"uacute": "\xfa",
"uacute;": "\xfa",
"uarr;": "\u2191",
"ubrcy;": "\u045e",
"ubreve;": "\u016d",
"ucirc": "\xfb",
"ucirc;": "\xfb",
"ucy;": "\u0443",
"udarr;": "\u21c5",
"udblac;": "\u0171",
"udhar;": "\u296e",
"ufisht;": "\u297e",
"ufr;": "\U0001d532",
"ugrave": "\xf9",
"ugrave;": "\xf9",
"uharl;": "\u21bf",
"uharr;": "\u21be",
"uhblk;": "\u2580",
"ulcorn;": "\u231c",
"ulcorner;": "\u231c",
"ulcrop;": "\u230f",
"ultri;": "\u25f8",
"umacr;": "\u016b",
"uml": "\xa8",
"uml;": "\xa8",
"uogon;": "\u0173",
"uopf;": "\U0001d566",
"uparrow;": "\u2191",
"updownarrow;": "\u2195",
"upharpoonleft;": "\u21bf",
"upharpoonright;": "\u21be",
"uplus;": "\u228e",
"upsi;": "\u03c5",
"upsih;": "\u03d2",
"upsilon;": "\u03c5",
"upuparrows;": "\u21c8",
"urcorn;": "\u231d",
"urcorner;": "\u231d",
"urcrop;": "\u230e",
"uring;": "\u016f",
"urtri;": "\u25f9",
"uscr;": "\U0001d4ca",
"utdot;": "\u22f0",
"utilde;": "\u0169",
"utri;": "\u25b5",
"utrif;": "\u25b4",
"uuarr;": "\u21c8",
"uuml": "\xfc",
"uuml;": "\xfc",
"uwangle;": "\u29a7",
"vArr;": "\u21d5",
"vBar;": "\u2ae8",
"vBarv;": "\u2ae9",
"vDash;": "\u22a8",
"vangrt;": "\u299c",
"varepsilon;": "\u03f5",
"varkappa;": "\u03f0",
"varnothing;": "\u2205",
"varphi;": "\u03d5",
"varpi;": "\u03d6",
"varpropto;": "\u221d",
"varr;": "\u2195",
"varrho;": "\u03f1",
"varsigma;": "\u03c2",
"varsubsetneq;": "\u228a\ufe00",
"varsubsetneqq;": "\u2acb\ufe00",
"varsupsetneq;": "\u228b\ufe00",
"varsupsetneqq;": "\u2acc\ufe00",
"vartheta;": "\u03d1",
"vartriangleleft;": "\u22b2",
"vartriangleright;": "\u22b3",
"vcy;": "\u0432",
"vdash;": "\u22a2",
"vee;": "\u2228",
"veebar;": "\u22bb",
"veeeq;": "\u225a",
"vellip;": "\u22ee",
"verbar;": "|",
"vert;": "|",
"vfr;": "\U0001d533",
"vltri;": "\u22b2",
"vnsub;": "\u2282\u20d2",
"vnsup;": "\u2283\u20d2",
"vopf;": "\U0001d567",
"vprop;": "\u221d",
"vrtri;": "\u22b3",
"vscr;": "\U0001d4cb",
"vsubnE;": "\u2acb\ufe00",
"vsubne;": "\u228a\ufe00",
"vsupnE;": "\u2acc\ufe00",
"vsupne;": "\u228b\ufe00",
"vzigzag;": "\u299a",
"wcirc;": "\u0175",
"wedbar;": "\u2a5f",
"wedge;": "\u2227",
"wedgeq;": "\u2259",
"weierp;": "\u2118",
"wfr;": "\U0001d534",
"wopf;": "\U0001d568",
"wp;": "\u2118",
"wr;": "\u2240",
"wreath;": "\u2240",
"wscr;": "\U0001d4cc",
"xcap;": "\u22c2",
"xcirc;": "\u25ef",
"xcup;": "\u22c3",
"xdtri;": "\u25bd",
"xfr;": "\U0001d535",
"xhArr;": "\u27fa",
"xharr;": "\u27f7",
"xi;": "\u03be",
"xlArr;": "\u27f8",
"xlarr;": "\u27f5",
"xmap;": "\u27fc",
"xnis;": "\u22fb",
"xodot;": "\u2a00",
"xopf;": "\U0001d569",
"xoplus;": "\u2a01",
"xotime;": "\u2a02",
"xrArr;": "\u27f9",
"xrarr;": "\u27f6",
"xscr;": "\U0001d4cd",
"xsqcup;": "\u2a06",
"xuplus;": "\u2a04",
"xutri;": "\u25b3",
"xvee;": "\u22c1",
"xwedge;": "\u22c0",
"yacute": "\xfd",
"yacute;": "\xfd",
"yacy;": "\u044f",
"ycirc;": "\u0177",
"ycy;": "\u044b",
"yen": "\xa5",
"yen;": "\xa5",
"yfr;": "\U0001d536",
"yicy;": "\u0457",
"yopf;": "\U0001d56a",
"yscr;": "\U0001d4ce",
"yucy;": "\u044e",
"yuml": "\xff",
"yuml;": "\xff",
"zacute;": "\u017a",
"zcaron;": "\u017e",
"zcy;": "\u0437",
"zdot;": "\u017c",
"zeetrf;": "\u2128",
"zeta;": "\u03b6",
"zfr;": "\U0001d537",
"zhcy;": "\u0436",
"zigrarr;": "\u21dd",
"zopf;": "\U0001d56b",
"zscr;": "\U0001d4cf",
"zwj;": "\u200d",
"zwnj;": "\u200c",
}
replacementCharacters = {
0x0: "\uFFFD",
0x0d: "\u000D",
0x80: "\u20AC",
0x81: "\u0081",
0x82: "\u201A",
0x83: "\u0192",
0x84: "\u201E",
0x85: "\u2026",
0x86: "\u2020",
0x87: "\u2021",
0x88: "\u02C6",
0x89: "\u2030",
0x8A: "\u0160",
0x8B: "\u2039",
0x8C: "\u0152",
0x8D: "\u008D",
0x8E: "\u017D",
0x8F: "\u008F",
0x90: "\u0090",
0x91: "\u2018",
0x92: "\u2019",
0x93: "\u201C",
0x94: "\u201D",
0x95: "\u2022",
0x96: "\u2013",
0x97: "\u2014",
0x98: "\u02DC",
0x99: "\u2122",
0x9A: "\u0161",
0x9B: "\u203A",
0x9C: "\u0153",
0x9D: "\u009D",
0x9E: "\u017E",
0x9F: "\u0178",
}
tokenTypes = {
"Doctype": 0,
"Characters": 1,
"SpaceCharacters": 2,
"StartTag": 3,
"EndTag": 4,
"EmptyTag": 5,
"Comment": 6,
"ParseError": 7
}
tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]])
prefixes = dict([(v, k) for k, v in namespaces.items()])
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
class DataLossWarning(UserWarning):
pass
class ReparseException(Exception):
pass
from __future__ import absolute_import, division, unicode_literals
from . import base
try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict
class Filter(base.Filter):
def __iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
key=lambda x: x[0]):
attrs[name] = value
token["data"] = attrs
yield token
from __future__ import absolute_import, division, unicode_literals
class Filter(object):
def __init__(self, source):
self.source = source
def __iter__(self):
return iter(self.source)
def __getattr__(self, name):
return getattr(self.source, name)
from __future__ import absolute_import, division, unicode_literals
from . import base
class Filter(base.Filter):
def __init__(self, source, encoding):
base.Filter.__init__(self, source)
self.encoding = encoding
def __iter__(self):
state = "pre_head"
meta_found = (self.encoding is None)
pending = []
for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == "head":
state = "in_head"
elif type == "EmptyTag":
if token["name"].lower() == "meta":
# replace charset with actual encoding
has_http_equiv_content_type = False
for (namespace, name), value in token["data"].items():
if namespace is not None:
continue
elif name.lower() == 'charset':
token["data"][(namespace, name)] = self.encoding
meta_found = True
break
elif name == 'http-equiv' and value.lower() == 'content-type':
has_http_equiv_content_type = True
else:
if has_http_equiv_content_type and (None, "content") in token["data"]:
token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
meta_found = True
elif token["name"].lower() == "head" and not meta_found:
# insert meta into empty head
yield {"type": "StartTag", "name": "head",
"data": token["data"]}
yield {"type": "EmptyTag", "name": "meta",
"data": {(None, "charset"): self.encoding}}
yield {"type": "EndTag", "name": "head"}
meta_found = True
continue
elif type == "EndTag":
if token["name"].lower() == "head" and pending:
# insert meta into head (if necessary) and flush pending queue
yield pending.pop(0)
if not meta_found:
yield {"type": "EmptyTag", "name": "meta",
"data": {(None, "charset"): self.encoding}}
while pending:
yield pending.pop(0)
meta_found = True
state = "post_head"
if state == "in_head":
pending.append(token)
else:
yield token
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type
from . import base
from ..constants import namespaces, voidElements
from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)
class Filter(base.Filter):
def __init__(self, source, require_matching_tags=True):
super(Filter, self).__init__(source)
self.require_matching_tags = require_matching_tags
def __iter__(self):
open_elements = []
for token in base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert type == "EmptyTag"
else:
assert type == "StartTag"
if type == "StartTag" and self.require_matching_tags:
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
assert isinstance(value, text_type)
elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
assert namespace is None or isinstance(namespace, text_type)
assert namespace != ""
assert isinstance(name, text_type)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
elif self.require_matching_tags:
start = open_elements.pop()
assert start == (namespace, name)
elif type == "Comment":
data = token["data"]
assert isinstance(data, text_type)
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
assert isinstance(data, text_type)
assert data != ""
if type == "SpaceCharacters":
assert data.strip(spaceCharacters) == ""
elif type == "Doctype":
name = token["name"]
assert name is None or isinstance(name, text_type)
assert token["publicId"] is None or isinstance(name, text_type)
assert token["systemId"] is None or isinstance(name, text_type)
elif type == "Entity":
assert isinstance(token["name"], text_type)
elif type == "SerializerError":
assert isinstance(token["data"], text_type)
else:
assert False, "Unknown token type: %(type)s" % {"type": type}
yield token
from __future__ import absolute_import, division, unicode_literals
from . import base
class Filter(base.Filter):
def slider(self):
previous1 = previous2 = None
for token in self.source:
if previous1 is not None:
yield previous2, previous1, token
previous2 = previous1
previous1 = token
if previous1 is not None:
yield previous2, previous1, None
def __iter__(self):
for previous, token, next in self.slider():
type = token["type"]
if type == "StartTag":
if (token["data"] or
not self.is_optional_start(token["name"], previous, next)):
yield token
elif type == "EndTag":
if not self.is_optional_end(token["name"], next):
yield token
else:
yield token
def is_optional_start(self, tagname, previous, next):
type = next and next["type"] or None
if tagname in 'html':
# An html element's start tag may be omitted if the first thing
# inside the html element is not a space character or a comment.
return type not in ("Comment", "SpaceCharacters")
elif tagname == 'head':
# A head element's start tag may be omitted if the first thing
# inside the head element is an element.
# XXX: we also omit the start tag if the head element is empty
if type in ("StartTag", "EmptyTag"):
return True
elif type == "EndTag":
return next["name"] == "head"
elif tagname == 'body':
# A body element's start tag may be omitted if the first thing
# inside the body element is not a space character or a comment,
# except if the first thing inside the body element is a script
# or style element and the node immediately preceding the body
# element is a head element whose end tag has been omitted.
if type in ("Comment", "SpaceCharacters"):
return False
elif type == "StartTag":
# XXX: we do not look at the preceding event, so we never omit
# the body element's start tag if it's followed by a script or
# a style element.
return next["name"] not in ('script', 'style')
else:
return True
elif tagname == 'colgroup':
# A colgroup element's start tag may be omitted if the first thing
# inside the colgroup element is a col element, and if the element
# is not immediately preceded by another colgroup element whose
# end tag has been omitted.
if type in ("StartTag", "EmptyTag"):
# XXX: we do not look at the preceding event, so instead we never
# omit the colgroup element's end tag when it is immediately
# followed by another colgroup element. See is_optional_end.
return next["name"] == "col"
else:
return False
elif tagname == 'tbody':
# A tbody element's start tag may be omitted if the first thing
# inside the tbody element is a tr element, and if the element is
# not immediately preceded by a tbody, thead, or tfoot element
# whose end tag has been omitted.
if type == "StartTag":
# omit the thead and tfoot elements' end tag when they are
# immediately followed by a tbody element. See is_optional_end.
if previous and previous['type'] == 'EndTag' and \
previous['name'] in ('tbody', 'thead', 'tfoot'):
return False
return next["name"] == 'tr'
else:
return False
return False
def is_optional_end(self, tagname, next):
type = next and next["type"] or None
if tagname in ('html', 'head', 'body'):
# An html element's end tag may be omitted if the html element
# is not immediately followed by a space character or a comment.
return type not in ("Comment", "SpaceCharacters")
elif tagname in ('li', 'optgroup', 'tr'):
# A li element's end tag may be omitted if the li element is
# immediately followed by another li element or if there is
# no more content in the parent element.
# An optgroup element's end tag may be omitted if the optgroup
# element is immediately followed by another optgroup element,
# or if there is no more content in the parent element.
# A tr element's end tag may be omitted if the tr element is
# immediately followed by another tr element, or if there is
# no more content in the parent element.
if type == "StartTag":
return next["name"] == tagname
else:
return type == "EndTag" or type is None
elif tagname in ('dt', 'dd'):
# A dt element's end tag may be omitted if the dt element is
# immediately followed by another dt element or a dd element.
# A dd element's end tag may be omitted if the dd element is
# immediately followed by another dd element or a dt element,
# or if there is no more content in the parent element.
if type == "StartTag":
return next["name"] in ('dt', 'dd')
elif tagname == 'dd':
return type == "EndTag" or type is None
else:
return False
elif tagname == 'p':
# A p element's end tag may be omitted if the p element is
# immediately followed by an address, article, aside,
# blockquote, datagrid, dialog, dir, div, dl, fieldset,
# footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
# nav, ol, p, pre, section, table, or ul, element, or if
# there is no more content in the parent element.
if type in ("StartTag", "EmptyTag"):
return next["name"] in ('address', 'article', 'aside',
'blockquote', 'datagrid', 'dialog',
'dir', 'div', 'dl', 'fieldset', 'footer',
'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'header', 'hr', 'menu', 'nav', 'ol',
'p', 'pre', 'section', 'table', 'ul')
else:
return type == "EndTag" or type is None
elif tagname == 'option':
# An option element's end tag may be omitted if the option
# element is immediately followed by another option element,
# or if it is immediately followed by an <code>optgroup</code>
# element, or if there is no more content in the parent
# element.
if type == "StartTag":
return next["name"] in ('option', 'optgroup')
else:
return type == "EndTag" or type is None
elif tagname in ('rt', 'rp'):
# An rt element's end tag may be omitted if the rt element is
# immediately followed by an rt or rp element, or if there is
# no more content in the parent element.
# An rp element's end tag may be omitted if the rp element is
# immediately followed by an rt or rp element, or if there is
# no more content in the parent element.
if type == "StartTag":
return next["name"] in ('rt', 'rp')
else:
return type == "EndTag" or type is None
elif tagname == 'colgroup':
# A colgroup element's end tag may be omitted if the colgroup
# element is not immediately followed by a space character or
# a comment.
if type in ("Comment", "SpaceCharacters"):
return False
elif type == "StartTag":
# XXX: we also look for an immediately following colgroup
# element. See is_optional_start.
return next["name"] != 'colgroup'
else:
return True
elif tagname in ('thead', 'tbody'):
# A thead element's end tag may be omitted if the thead element
# is immediately followed by a tbody or tfoot element.
# A tbody element's end tag may be omitted if the tbody element
# is immediately followed by a tbody or tfoot element, or if
# there is no more content in the parent element.
# A tfoot element's end tag may be omitted if the tfoot element
# is immediately followed by a tbody element, or if there is no
# more content in the parent element.
# XXX: we never omit the end tag when the following element is
# a tbody. See is_optional_start.
if type == "StartTag":
return next["name"] in ['tbody', 'tfoot']
elif tagname == 'tbody':
return type == "EndTag" or type is None
else:
return False
elif tagname == 'tfoot':
# A tfoot element's end tag may be omitted if the tfoot element
# is immediately followed by a tbody element, or if there is no
# more content in the parent element.
# XXX: we never omit the end tag when the following element is
# a tbody. See is_optional_start.
if type == "StartTag":
return next["name"] == 'tbody'
else:
return type == "EndTag" or type is None
elif tagname in ('td', 'th'):
# A td element's end tag may be omitted if the td element is
# immediately followed by a td or th element, or if there is
# no more content in the parent element.
# A th element's end tag may be omitted if the th element is
# immediately followed by a td or th element, or if there is
# no more content in the parent element.
if type == "StartTag":
return next["name"] in ('td', 'th')
else:
return type == "EndTag" or type is None
return False
This file has been truncated, but you can view the full file.
from __future__ import absolute_import, division, unicode_literals
import re
from xml.sax.saxutils import escape, unescape
from pip._vendor.six.moves import urllib_parse as urlparse
from . import base
from ..constants import namespaces, prefixes
__all__ = ["Filter"]
allowed_elements = frozenset((
(namespaces['html'], 'a'),
(namespaces['html'], 'abbr'),
(namespaces['html'], 'acronym'),
(namespaces['html'], 'address'),
(namespaces['html'], 'area'),
(namespaces['html'], 'article'),
(namespaces['html'], 'aside'),
(namespaces['html'], 'audio'),
(namespaces['html'], 'b'),
(namespaces['html'], 'big'),
(namespaces['html'], 'blockquote'),
(namespaces['html'], 'br'),
(namespaces['html'], 'button'),
(namespaces['html'], 'canvas'),
(namespaces['html'], 'caption'),
(namespaces['html'], 'center'),
(namespaces['html'], 'cite'),
(namespaces['html'], 'code'),
(namespaces['html'], 'col'),
(namespaces['html'], 'colgroup'),
(namespaces['html'], 'command'),
(namespaces['html'], 'datagrid'),
(namespaces['html'], 'datalist'),
(namespaces['html'], 'dd'),
(namespaces['html'], 'del'),
(namespaces['html'], 'details'),
(namespaces['html'], 'dfn'),
(namespaces['html'], 'dialog'),
(namespaces['html'], 'dir'),
(namespaces['html'], 'div'),
(namespaces['html'], 'dl'),
(namespaces['html'], 'dt'),
(namespaces['html'], 'em'),
(namespaces['html'], 'event-source'),
(namespaces['html'], 'fieldset'),
(namespaces['html'], 'figcaption'),
(namespaces['html'], 'figure'),
(namespaces['html'], 'footer'),
(namespaces['html'], 'font'),
(namespaces['html'], 'form'),
(namespaces['html'], 'header'),
(namespaces['html'], 'h1'),
(namespaces['html'], 'h2'),
(namespaces['html'], 'h3'),
(namespaces['html'], 'h4'),
(namespaces['html'], 'h5'),
(namespaces['html'], 'h6'),
(namespaces['html'], 'hr'),
(namespaces['html'], 'i'),
(namespaces['html'], 'img'),
(namespaces['html'], 'input'),
(namespaces['html'], 'ins'),
(namespaces['html'], 'keygen'),
(namespaces['html'], 'kbd'),
(namespaces['html'], 'label'),
(namespaces['html'], 'legend'),
(namespaces['html'], 'li'),
(namespaces['html'], 'm'),
(namespaces['html'], 'map'),
(namespaces['html'], 'menu'),
(namespaces['html'], 'meter'),
(namespaces['html'], 'multicol'),
(namespaces['html'], 'nav'),
(namespaces['html'], 'nextid'),
(namespaces['html'], 'ol'),
(namespaces['html'], 'output'),
(namespaces['html'], 'optgroup'),
(namespaces['html'], 'option'),
(namespaces['html'], 'p'),
(namespaces['html'], 'pre'),
(namespaces['html'], 'progress'),
(namespaces['html'], 'q'),
(namespaces['html'], 's'),
(namespaces['html'], 'samp'),
(namespaces['html'], 'section'),
(namespaces['html'], 'select'),
(namespaces['html'], 'small'),
(namespaces['html'], 'sound'),
(namespaces['html'], 'source'),
(namespaces['html'], 'spacer'),
(namespaces['html'], 'span'),
(namespaces['html'], 'strike'),
(namespaces['html'], 'strong'),
(namespaces['html'], 'sub'),
(namespaces['html'], 'sup'),
(namespaces['html'], 'table'),
(namespaces['html'], 'tbody'),
(namespaces['html'], 'td'),
(namespaces['html'], 'textarea'),
(namespaces['html'], 'time'),
(namespaces['html'], 'tfoot'),
(namespaces['html'], 'th'),
(namespaces['html'], 'thead'),
(namespaces['html'], 'tr'),
(namespaces['html'], 'tt'),
(namespaces['html'], 'u'),
(namespaces['html'], 'ul'),
(namespaces['html'], 'var'),
(namespaces['html'], 'video'),
(namespaces['mathml'], 'maction'),
(namespaces['mathml'], 'math'),
(namespaces['mathml'], 'merror'),
(namespaces['mathml'], 'mfrac'),
(namespaces['mathml'], 'mi'),
(namespaces['mathml'], 'mmultiscripts'),
(namespaces['mathml'], 'mn'),
(namespaces['mathml'], 'mo'),
(namespaces['mathml'], 'mover'),
(namespaces['mathml'], 'mpadded'),
(namespaces['mathml'], 'mphantom'),
(namespaces['mathml'], 'mprescripts'),
(namespaces['mathml'], 'mroot'),
(namespaces['mathml'], 'mrow'),
(namespaces['mathml'], 'mspace'),
(namespaces['mathml'], 'msqrt'),
(namespaces['mathml'], 'mstyle'),
(namespaces['mathml'], 'msub'),
(namespaces['mathml'], 'msubsup'),
(namespaces['mathml'], 'msup'),
(namespaces['mathml'], 'mtable'),
(namespaces['mathml'], 'mtd'),
(namespaces['mathml'], 'mtext'),
(namespaces['mathml'], 'mtr'),
(namespaces['mathml'], 'munder'),
(namespaces['mathml'], 'munderover'),
(namespaces['mathml'], 'none'),
(namespaces['svg'], 'a'),
(namespaces['svg'], 'animate'),
(namespaces['svg'], 'animateColor'),
(namespaces['svg'], 'animateMotion'),
(namespaces['svg'], 'animateTransform'),
(namespaces['svg'], 'clipPath'),
(namespaces['svg'], 'circle'),
(namespaces['svg'], 'defs'),
(namespaces['svg'], 'desc'),
(namespaces['svg'], 'ellipse'),
(namespaces['svg'], 'font-face'),
(namespaces['svg'], 'font-face-name'),
(namespaces['svg'], 'font-face-src'),
(namespaces['svg'], 'g'),
(namespaces['svg'], 'glyph'),
(namespaces['svg'], 'hkern'),
(namespaces['svg'], 'linearGradient'),
(namespaces['svg'], 'line'),
(namespaces['svg'], 'marker'),
(namespaces['svg'], 'metadata'),
(namespaces['svg'], 'missing-glyph'),
(namespaces['svg'], 'mpath'),
(namespaces['svg'], 'path'),
(namespaces['svg'], 'polygon'),
(namespaces['svg'], 'polyline'),
(namespaces['svg'], 'radialGradient'),
(namespaces['svg'], 'rect'),
(namespaces['svg'], 'set'),
(namespaces['svg'], 'stop'),
(namespaces['svg'], 'svg'),
(namespaces['svg'], 'switch'),
(namespaces['svg'], 'text'),
(namespaces['svg'], 'title'),
(namespaces['svg'], 'tspan'),
(namespaces['svg'], 'use'),
))
allowed_attributes = frozenset((
# HTML attributes
(None, 'abbr'),
(None, 'accept'),
(None, 'accept-charset'),
(None, 'accesskey'),
(None, 'action'),
(None, 'align'),
(None, 'alt'),
(None, 'autocomplete'),
(None, 'autofocus'),
(None, 'axis'),
(None, 'background'),
(None, 'balance'),
(None, 'bgcolor'),
(None, 'bgproperties'),
(None, 'border'),
(None, 'bordercolor'),
(None, 'bordercolordark'),
(None, 'bordercolorlight'),
(None, 'bottompadding'),
(None, 'cellpadding'),
(None, 'cellspacing'),
(None, 'ch'),
(None, 'challenge'),
(None, 'char'),
(None, 'charoff'),
(None, 'choff'),
(None, 'charset'),
(None, 'checked'),
(None, 'cite'),
(None, 'class'),
(None, 'clear'),
(None, 'color'),
(None, 'cols'),
(None, 'colspan'),
(None, 'compact'),
(None, 'contenteditable'),
(None, 'controls'),
(None, 'coords'),
(None, 'data'),
(None, 'datafld'),
(None, 'datapagesize'),
(None, 'datasrc'),
(None, 'datetime'),
(None, 'default'),
View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment