McSinyx/extras_provider.py

## extras_provider.py
"""A provider that handles packages with "extras".

Python package dependencies can include "extras", which are additional
dependencies that are installed "on demand". For instance, project X could
have an additional set of dependencies if PDF generation features are needed.
These can be defined for an extra "pdf" and requested on install as X[pdf].

The basic resolvelib algorithm cannot handle extras, as it builds a dependency
graph which needs to be static - the edges (dependencies) from a node
(candidate) must be fixed. Extras break this assumption.

To model projects with extras, we define a candidate as being a project with a
specific set of dependencies. This introduces a problem, as the resolver could
produce a solution that demands version 1.0 of X[foo] and version 2.0 of
X[bar]. This is impossible, as there is actually only one project X to be
installed. To address this, we inject an additional dependency for every
candidate with an extra - X[foo] version v depends on X version v. By doing
this, we constrain the solution to require a unique version of X.
"""

from resolvelib.providers import AbstractProvider


class ExtrasProvider(AbstractProvider):
    """A provider that handles extras.
    """

    def get_extras_for(self, requirement_or_candidate):
        """Given a requirement or candidate, return its extras.

        The extras should be a hashable value.
        """
        raise NotImplementedError

    def get_base_requirement(self, candidate):
        """Given a candidate, return a requirement that specifies that
        project/version.

        """
        raise NotImplementedError

    def identify(self, requirement_or_candidate):
        base = super(ExtrasProvider, self).identify(requirement_or_candidate)
        extras = self.get_extras_for(requirement_or_candidate)
        if extras:
            return (base, extras)
        else:
            return base

    def get_dependencies(self, candidate):
        deps = super(ExtrasProvider, self).get_dependencies(candidate)
        if candidate.extras:
            req = self.get_base_requirement(candidate)
            deps.append(req)
        return deps

## oslo-utils-1.4.0.log
Downloading oslo-utils==1.4.0
Downloading pbr==0.11.1
Downloading babel==2.8.0
Downloading six==1.15.0
Downloading iso8601==0.1.12
Downloading oslo-i18n==4.0.1
Downloading netaddr==0.7.19
Downloading netifaces==0.10.9
Downloading pip==20.1.1
Downloading pytz==2020.1
Downloading oslo-i18n==4.0.0
Downloading oslo-i18n==3.25.1
Downloading oslo-i18n==3.25.0
Downloading oslo-i18n==3.24.0
Downloading oslo-i18n==3.23.1
Downloading oslo-i18n==3.23.0
Downloading oslo-i18n==3.22.1
Downloading oslo-i18n==3.22.0
Downloading oslo-i18n==3.21.0
Downloading oslo-i18n==3.20.0
Downloading oslo-i18n==3.19.0
Downloading oslo-i18n==3.18.0
Downloading oslo-i18n==3.17.2
Downloading oslo-i18n==3.17.1
Downloading oslo-i18n==3.17.0
Downloading oslo-i18n==3.16.0
Downloading oslo-i18n==3.15.3
Downloading oslo-i18n==3.15.2
Downloading oslo-i18n==3.15.1
Downloading oslo-i18n==3.15.0
Downloading oslo-i18n==3.14.0
Downloading oslo-i18n==3.13.0
Downloading oslo-i18n==3.12.0
Downloading oslo-i18n==3.11.0
Downloading oslo-i18n==3.10.0
Downloading oslo-i18n==3.9.0
Downloading oslo-i18n==3.8.0
Downloading oslo-i18n==3.7.0
Downloading oslo-i18n==3.6.0
Downloading oslo-i18n==3.5.0
Downloading oslo-i18n==3.4.0
Downloading oslo-i18n==3.3.0
Downloading oslo-i18n==3.2.0
Downloading oslo-i18n==3.1.0
Downloading oslo-i18n==3.0.0
Downloading oslo-i18n==2.7.0
Downloading oslo-i18n==2.6.0
Downloading oslo-i18n==2.5.0
Downloading oslo-i18n==2.4.0
Downloading oslo-i18n==2.3.0
Downloading oslo-i18n==2.2.0
Downloading oslo-i18n==2.1.0

--- Pinned Candidates ---
oslo-utils 1.4.0
iso8601 0.1.12
six 1.15.0
netaddr 0.7.19
pbr 0.11.1
babel 2.8.0
pytz 2020.1
oslo-i18n 2.1.0
pip 20.1.1
netifaces 0.10.9

--- Dependency Graph ---
None -> oslo-utils
pip -> None
oslo-utils -> pbr, iso8601, netaddr, oslo-i18n, six, netifaces, babel
pbr -> pip
iso8601 -> None
oslo-i18n -> six, babel, pbr
six -> None
netaddr -> None
netifaces -> None
babel -> pytz
pytz -> None

## rl-wheel-parallel.py
#!/usr/bin/env python3
from email.message import EmailMessage
from email.parser import BytesParser
from io import BytesIO
from multiprocessing.dummy import Pool
from operator import attrgetter, methodcaller
from platform import python_version
from sys import argv
from urllib.parse import urlparse
from zipfile import ZipFile

import requests
import html5lib
from packaging.specifiers import SpecifierSet
from packaging.version import Version, InvalidVersion
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from resolvelib import BaseReporter, Resolver

from extras_provider import ExtrasProvider

PYTHON_VERSION = Version(python_version())


class Candidate:
    def __init__(self, name, version, url, extras, provider):
        self.name, self.version = canonicalize_name(name), version
        self.url, self.extras, self.provider = url, extras, provider  # XXX
        self._metadata = None

    def __repr__(self):
        if not self.extras: return f'<{self.name}=={self.version}>'
        return f"<{self.name}[{','.join(self.extras)}]=={self.version}>"

    @property
    def metadata(self):
        if self._metadata is None:
            self.provider.download()  # XXX
            self.fetch_metadata()  # incase the candidate is not the first
        return self._metadata

    def fetch_metadata(self):
        if self._metadata is not None: return
        print(f'Downloading {self.name}=={self.version}')
        data = requests.get(self.url).content
        with ZipFile(BytesIO(data)) as z:
            for n in z.namelist():
                if n.endswith('.dist-info/METADATA'):
                    p = BytesParser()
                    self._metadata = p.parse(z.open(n), headersonly=True)
                    return
        self._metadata = EmailMessage()

    @property
    def requires_python(self):
        return self.metadata.get('Requires-Python')

    @property
    def dependencies(self):
        extras = self.extras if self.extras else ['']
        for d in self.metadata.get_all('Requires-Dist', []):
            r = Requirement(d)
            if r.marker is None:
                yield r
            else:
                for e in extras:
                    if r.marker.evaluate({'extra': e}):
                        yield r


class Downloader:
    def __init__(self, *args, **kwargs):
        self.soon_needed, self.cached_candidates = {}, {}

    def need_soon(self, candidate):
        self.soon_needed[candidate.name] = candidate

    def download(self):  # XXX
        with Pool(10) as pool:
            pool.map(methodcaller('fetch_metadata'), self.soon_needed.values())
        self.soon_needed = {}

    def get_from_pypi(self, project, extras):
        url = 'https://pypi.org/simple/{}'.format(project)
        data = requests.get(url).content
        doc = html5lib.parse(data, namespaceHTMLElements=False)
        for i in doc.findall('.//a'):
            url = i.attrib['href']
            # Skip items that need a different Python version
            py_req = i.attrib.get('data-requires-python')
            if py_req:
                spec = SpecifierSet(py_req)
                if PYTHON_VERSION not in spec: continue

            path = urlparse(url).path
            filename = path.rpartition('/')[-1]
            # We only handle wheels
            if not filename.endswith('.whl'): continue
            name, version = filename[:-4].split("-")[:2]
            # TODO: Handle compatibility tags?
            try:
                version = Version(version)
            except InvalidVersion:
                # Ignore files with invalid versions
                continue
            yield self.make_candidate(name, version, url, extras)  # XXX

    def make_candidate(self, name, version, url, extras):
        if (name, version) not in self.cached_candidates:
            self.cached_candidates[(name, version)] = Candidate(
                name, version, url, extras, self)
        return self.cached_candidates[(name, version)]


class PyPIProvider(ExtrasProvider, Downloader):
    def identify(self, requirement_or_candidate):
        return canonicalize_name(requirement_or_candidate.name)

    def get_extras_for(self, requirement_or_candidate):
        # Extras is a set, which is not hashable
        return tuple(sorted(requirement_or_candidate.extras))

    def get_base_requirement(self, candidate):
        return Requirement('{0.name}=={0.version}'.format(candidate))

    def get_preference(self, resolution, candidates, information):
        return len(candidates)

    def find_matches(self, requirements):
        if not requirements:
            raise RuntimeError('resolver promises at least one requirement')
        if any(r.extras for r in requirements[1:]):
            raise RuntimeError('extras not supported in this example')
        name, candidates = canonicalize_name(requirements[0].name), []
        for c in self.get_from_pypi(name, set()):
            # Need to pass the extras to the search, so they
            # are added to the candidate at creation - we
            # treat candidates as immutable once created.
            if all(c.version in r.specifier for r in requirements):
                candidates.append(c)
        candidates.sort(key=attrgetter('version'), reverse=True)
        try:
            self.need_soon(candidates[0])  # XXX
        except IndexError:
            pass  # usually when only sdist provided
        return candidates

    def is_satisfied_by(self, requirement, candidate):
        if canonicalize_name(requirement.name) != candidate.name: return False
        return candidate.version in requirement.specifier

    def get_dependencies(self, candidate):
        return list(candidate.dependencies)


def main():
    if len(argv) < 2: return print('Usage:', argv[0], '<PyPI project name(s)>')
    # Create the (reusable) resolver
    # from my customly defined provider and a default reporter.
    provider = PyPIProvider()
    reporter = BaseReporter()
    resolver = Resolver(provider, reporter)
    # Kick off the resolution process, and get the final result.
    result = resolver.resolve([Requirement(r) for r in argv[1:]])

    # Display the resolution result.
    print('\n--- Pinned Candidates ---')
    for name, candidate in result.mapping.items():
        print(f'{name} {candidate.version}')
    print('\n--- Dependency Graph ---')
    for name in result.graph:
        targets = ', '.join(result.graph.iter_children(name)) or None
        print(f"{name} -> {targets}")


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        print()
	"""A provider that handles packages with "extras".

	Python package dependencies can include "extras", which are additional
	dependencies that are installed "on demand". For instance, project X could
	have an additional set of dependencies if PDF generation features are needed.
	These can be defined for an extra "pdf" and requested on install as X[pdf].

	The basic resolvelib algorithm cannot handle extras, as it builds a dependency
	graph which needs to be static - the edges (dependencies) from a node
	(candidate) must be fixed. Extras break this assumption.

	To model projects with extras, we define a candidate as being a project with a
	specific set of dependencies. This introduces a problem, as the resolver could
	produce a solution that demands version 1.0 of X[foo] and version 2.0 of
	X[bar]. This is impossible, as there is actually only one project X to be
	installed. To address this, we inject an additional dependency for every
	candidate with an extra - X[foo] version v depends on X version v. By doing
	this, we constrain the solution to require a unique version of X.
	"""

	from resolvelib.providers import AbstractProvider


	class ExtrasProvider(AbstractProvider):
	"""A provider that handles extras.
	"""

	def get_extras_for(self, requirement_or_candidate):
	"""Given a requirement or candidate, return its extras.

	The extras should be a hashable value.
	"""
	raise NotImplementedError

	def get_base_requirement(self, candidate):
	"""Given a candidate, return a requirement that specifies that
	project/version.

	"""
	raise NotImplementedError

	def identify(self, requirement_or_candidate):
	base = super(ExtrasProvider, self).identify(requirement_or_candidate)
	extras = self.get_extras_for(requirement_or_candidate)
	if extras:
	return (base, extras)
	else:
	return base

	def get_dependencies(self, candidate):
	deps = super(ExtrasProvider, self).get_dependencies(candidate)
	if candidate.extras:
	req = self.get_base_requirement(candidate)
	deps.append(req)
	return deps
	Downloading oslo-utils==1.4.0
	Downloading pbr==0.11.1
	Downloading babel==2.8.0
	Downloading six==1.15.0
	Downloading iso8601==0.1.12
	Downloading oslo-i18n==4.0.1
	Downloading netaddr==0.7.19
	Downloading netifaces==0.10.9
	Downloading pip==20.1.1
	Downloading pytz==2020.1
	Downloading oslo-i18n==4.0.0
	Downloading oslo-i18n==3.25.1
	Downloading oslo-i18n==3.25.0
	Downloading oslo-i18n==3.24.0
	Downloading oslo-i18n==3.23.1
	Downloading oslo-i18n==3.23.0
	Downloading oslo-i18n==3.22.1
	Downloading oslo-i18n==3.22.0
	Downloading oslo-i18n==3.21.0
	Downloading oslo-i18n==3.20.0
	Downloading oslo-i18n==3.19.0
	Downloading oslo-i18n==3.18.0
	Downloading oslo-i18n==3.17.2
	Downloading oslo-i18n==3.17.1
	Downloading oslo-i18n==3.17.0
	Downloading oslo-i18n==3.16.0
	Downloading oslo-i18n==3.15.3
	Downloading oslo-i18n==3.15.2
	Downloading oslo-i18n==3.15.1
	Downloading oslo-i18n==3.15.0
	Downloading oslo-i18n==3.14.0
	Downloading oslo-i18n==3.13.0
	Downloading oslo-i18n==3.12.0
	Downloading oslo-i18n==3.11.0
	Downloading oslo-i18n==3.10.0
	Downloading oslo-i18n==3.9.0
	Downloading oslo-i18n==3.8.0
	Downloading oslo-i18n==3.7.0
	Downloading oslo-i18n==3.6.0
	Downloading oslo-i18n==3.5.0
	Downloading oslo-i18n==3.4.0
	Downloading oslo-i18n==3.3.0
	Downloading oslo-i18n==3.2.0
	Downloading oslo-i18n==3.1.0
	Downloading oslo-i18n==3.0.0
	Downloading oslo-i18n==2.7.0
	Downloading oslo-i18n==2.6.0
	Downloading oslo-i18n==2.5.0
	Downloading oslo-i18n==2.4.0
	Downloading oslo-i18n==2.3.0
	Downloading oslo-i18n==2.2.0
	Downloading oslo-i18n==2.1.0

	--- Pinned Candidates ---
	oslo-utils 1.4.0
	iso8601 0.1.12
	six 1.15.0
	netaddr 0.7.19
	pbr 0.11.1
	babel 2.8.0
	pytz 2020.1
	oslo-i18n 2.1.0
	pip 20.1.1
	netifaces 0.10.9

	--- Dependency Graph ---
	None -> oslo-utils
	pip -> None
	oslo-utils -> pbr, iso8601, netaddr, oslo-i18n, six, netifaces, babel
	pbr -> pip
	iso8601 -> None
	oslo-i18n -> six, babel, pbr
	six -> None
	netaddr -> None
	netifaces -> None
	babel -> pytz
	pytz -> None
	#!/usr/bin/env python3
	from email.message import EmailMessage
	from email.parser import BytesParser
	from io import BytesIO
	from multiprocessing.dummy import Pool
	from operator import attrgetter, methodcaller
	from platform import python_version
	from sys import argv
	from urllib.parse import urlparse
	from zipfile import ZipFile

	import requests
	import html5lib
	from packaging.specifiers import SpecifierSet
	from packaging.version import Version, InvalidVersion
	from packaging.requirements import Requirement
	from packaging.utils import canonicalize_name
	from resolvelib import BaseReporter, Resolver

	from extras_provider import ExtrasProvider

	PYTHON_VERSION = Version(python_version())


	class Candidate:
	def __init__(self, name, version, url, extras, provider):
	self.name, self.version = canonicalize_name(name), version
	self.url, self.extras, self.provider = url, extras, provider # XXX
	self._metadata = None

	def __repr__(self):
	if not self.extras: return f'<{self.name}=={self.version}>'
	return f"<{self.name}[{','.join(self.extras)}]=={self.version}>"

	@property
	def metadata(self):
	if self._metadata is None:
	self.provider.download() # XXX
	self.fetch_metadata() # incase the candidate is not the first
	return self._metadata

	def fetch_metadata(self):
	if self._metadata is not None: return
	print(f'Downloading {self.name}=={self.version}')
	data = requests.get(self.url).content
	with ZipFile(BytesIO(data)) as z:
	for n in z.namelist():
	if n.endswith('.dist-info/METADATA'):
	p = BytesParser()
	self._metadata = p.parse(z.open(n), headersonly=True)
	return
	self._metadata = EmailMessage()

	@property
	def requires_python(self):
	return self.metadata.get('Requires-Python')

	@property
	def dependencies(self):
	extras = self.extras if self.extras else ['']
	for d in self.metadata.get_all('Requires-Dist', []):
	r = Requirement(d)
	if r.marker is None:
	yield r
	else:
	for e in extras:
	if r.marker.evaluate({'extra': e}):
	yield r


	class Downloader:
	def __init__(self, args, *kwargs):
	self.soon_needed, self.cached_candidates = {}, {}

	def need_soon(self, candidate):
	self.soon_needed[candidate.name] = candidate

	def download(self): # XXX
	with Pool(10) as pool:
	pool.map(methodcaller('fetch_metadata'), self.soon_needed.values())
	self.soon_needed = {}

	def get_from_pypi(self, project, extras):
	url = 'https://pypi.org/simple/{}'.format(project)
	data = requests.get(url).content
	doc = html5lib.parse(data, namespaceHTMLElements=False)
	for i in doc.findall('.//a'):
	url = i.attrib['href']
	# Skip items that need a different Python version
	py_req = i.attrib.get('data-requires-python')
	if py_req:
	spec = SpecifierSet(py_req)
	if PYTHON_VERSION not in spec: continue

	path = urlparse(url).path
	filename = path.rpartition('/')[-1]
	# We only handle wheels
	if not filename.endswith('.whl'): continue
	name, version = filename[:-4].split("-")[:2]
	# TODO: Handle compatibility tags?
	try:
	version = Version(version)
	except InvalidVersion:
	# Ignore files with invalid versions
	continue
	yield self.make_candidate(name, version, url, extras) # XXX

	def make_candidate(self, name, version, url, extras):
	if (name, version) not in self.cached_candidates:
	self.cached_candidates[(name, version)] = Candidate(
	name, version, url, extras, self)
	return self.cached_candidates[(name, version)]


	class PyPIProvider(ExtrasProvider, Downloader):
	def identify(self, requirement_or_candidate):
	return canonicalize_name(requirement_or_candidate.name)

	def get_extras_for(self, requirement_or_candidate):
	# Extras is a set, which is not hashable
	return tuple(sorted(requirement_or_candidate.extras))

	def get_base_requirement(self, candidate):
	return Requirement('{0.name}=={0.version}'.format(candidate))

	def get_preference(self, resolution, candidates, information):
	return len(candidates)

	def find_matches(self, requirements):
	if not requirements:
	raise RuntimeError('resolver promises at least one requirement')
	if any(r.extras for r in requirements[1:]):
	raise RuntimeError('extras not supported in this example')
	name, candidates = canonicalize_name(requirements[0].name), []
	for c in self.get_from_pypi(name, set()):
	# Need to pass the extras to the search, so they
	# are added to the candidate at creation - we
	# treat candidates as immutable once created.
	if all(c.version in r.specifier for r in requirements):
	candidates.append(c)
	candidates.sort(key=attrgetter('version'), reverse=True)
	try:
	self.need_soon(candidates[0]) # XXX
	except IndexError:
	pass # usually when only sdist provided
	return candidates

	def is_satisfied_by(self, requirement, candidate):
	if canonicalize_name(requirement.name) != candidate.name: return False
	return candidate.version in requirement.specifier

	def get_dependencies(self, candidate):
	return list(candidate.dependencies)


	def main():
	if len(argv) < 2: return print('Usage:', argv[0], '<PyPI project name(s)>')
	# Create the (reusable) resolver
	# from my customly defined provider and a default reporter.
	provider = PyPIProvider()
	reporter = BaseReporter()
	resolver = Resolver(provider, reporter)
	# Kick off the resolution process, and get the final result.
	result = resolver.resolve([Requirement(r) for r in argv[1:]])

	# Display the resolution result.
	print('\n--- Pinned Candidates ---')
	for name, candidate in result.mapping.items():
	print(f'{name} {candidate.version}')
	print('\n--- Dependency Graph ---')
	for name in result.graph:
	targets = ', '.join(result.graph.iter_children(name)) or None
	print(f"{name} -> {targets}")


	if __name__ == '__main__':
	try:
	main()
	except KeyboardInterrupt:
	print()