Last active
June 2, 2020 04:21
-
-
Save McSinyx/513dbff71174fcc79f1cb600e09881af to your computer and use it in GitHub Desktop.
Resolve wheel requirements from PyPI with parallel download
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A provider that handles packages with "extras". | |
Python package dependencies can include "extras", which are additional | |
dependencies that are installed "on demand". For instance, project X could | |
have an additional set of dependencies if PDF generation features are needed. | |
These can be defined for an extra "pdf" and requested on install as X[pdf]. | |
The basic resolvelib algorithm cannot handle extras, as it builds a dependency | |
graph which needs to be static - the edges (dependencies) from a node | |
(candidate) must be fixed. Extras break this assumption. | |
To model projects with extras, we define a candidate as being a project with a | |
specific set of dependencies. This introduces a problem, as the resolver could | |
produce a solution that demands version 1.0 of X[foo] and version 2.0 of | |
X[bar]. This is impossible, as there is actually only one project X to be | |
installed. To address this, we inject an additional dependency for every | |
candidate with an extra - X[foo] version v depends on X version v. By doing | |
this, we constrain the solution to require a unique version of X. | |
""" | |
from resolvelib.providers import AbstractProvider | |
class ExtrasProvider(AbstractProvider): | |
"""A provider that handles extras. | |
""" | |
def get_extras_for(self, requirement_or_candidate): | |
"""Given a requirement or candidate, return its extras. | |
The extras should be a hashable value. | |
""" | |
raise NotImplementedError | |
def get_base_requirement(self, candidate): | |
"""Given a candidate, return a requirement that specifies that | |
project/version. | |
""" | |
raise NotImplementedError | |
def identify(self, requirement_or_candidate): | |
base = super(ExtrasProvider, self).identify(requirement_or_candidate) | |
extras = self.get_extras_for(requirement_or_candidate) | |
if extras: | |
return (base, extras) | |
else: | |
return base | |
def get_dependencies(self, candidate): | |
deps = super(ExtrasProvider, self).get_dependencies(candidate) | |
if candidate.extras: | |
req = self.get_base_requirement(candidate) | |
deps.append(req) | |
return deps |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Downloading oslo-utils==1.4.0 | |
Downloading pbr==0.11.1 | |
Downloading babel==2.8.0 | |
Downloading six==1.15.0 | |
Downloading iso8601==0.1.12 | |
Downloading oslo-i18n==4.0.1 | |
Downloading netaddr==0.7.19 | |
Downloading netifaces==0.10.9 | |
Downloading pip==20.1.1 | |
Downloading pytz==2020.1 | |
Downloading oslo-i18n==4.0.0 | |
Downloading oslo-i18n==3.25.1 | |
Downloading oslo-i18n==3.25.0 | |
Downloading oslo-i18n==3.24.0 | |
Downloading oslo-i18n==3.23.1 | |
Downloading oslo-i18n==3.23.0 | |
Downloading oslo-i18n==3.22.1 | |
Downloading oslo-i18n==3.22.0 | |
Downloading oslo-i18n==3.21.0 | |
Downloading oslo-i18n==3.20.0 | |
Downloading oslo-i18n==3.19.0 | |
Downloading oslo-i18n==3.18.0 | |
Downloading oslo-i18n==3.17.2 | |
Downloading oslo-i18n==3.17.1 | |
Downloading oslo-i18n==3.17.0 | |
Downloading oslo-i18n==3.16.0 | |
Downloading oslo-i18n==3.15.3 | |
Downloading oslo-i18n==3.15.2 | |
Downloading oslo-i18n==3.15.1 | |
Downloading oslo-i18n==3.15.0 | |
Downloading oslo-i18n==3.14.0 | |
Downloading oslo-i18n==3.13.0 | |
Downloading oslo-i18n==3.12.0 | |
Downloading oslo-i18n==3.11.0 | |
Downloading oslo-i18n==3.10.0 | |
Downloading oslo-i18n==3.9.0 | |
Downloading oslo-i18n==3.8.0 | |
Downloading oslo-i18n==3.7.0 | |
Downloading oslo-i18n==3.6.0 | |
Downloading oslo-i18n==3.5.0 | |
Downloading oslo-i18n==3.4.0 | |
Downloading oslo-i18n==3.3.0 | |
Downloading oslo-i18n==3.2.0 | |
Downloading oslo-i18n==3.1.0 | |
Downloading oslo-i18n==3.0.0 | |
Downloading oslo-i18n==2.7.0 | |
Downloading oslo-i18n==2.6.0 | |
Downloading oslo-i18n==2.5.0 | |
Downloading oslo-i18n==2.4.0 | |
Downloading oslo-i18n==2.3.0 | |
Downloading oslo-i18n==2.2.0 | |
Downloading oslo-i18n==2.1.0 | |
--- Pinned Candidates --- | |
oslo-utils 1.4.0 | |
iso8601 0.1.12 | |
six 1.15.0 | |
netaddr 0.7.19 | |
pbr 0.11.1 | |
babel 2.8.0 | |
pytz 2020.1 | |
oslo-i18n 2.1.0 | |
pip 20.1.1 | |
netifaces 0.10.9 | |
--- Dependency Graph --- | |
None -> oslo-utils | |
pip -> None | |
oslo-utils -> pbr, iso8601, netaddr, oslo-i18n, six, netifaces, babel | |
pbr -> pip | |
iso8601 -> None | |
oslo-i18n -> six, babel, pbr | |
six -> None | |
netaddr -> None | |
netifaces -> None | |
babel -> pytz | |
pytz -> None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from email.message import EmailMessage | |
from email.parser import BytesParser | |
from io import BytesIO | |
from multiprocessing.dummy import Pool | |
from operator import attrgetter, methodcaller | |
from platform import python_version | |
from sys import argv | |
from urllib.parse import urlparse | |
from zipfile import ZipFile | |
import requests | |
import html5lib | |
from packaging.specifiers import SpecifierSet | |
from packaging.version import Version, InvalidVersion | |
from packaging.requirements import Requirement | |
from packaging.utils import canonicalize_name | |
from resolvelib import BaseReporter, Resolver | |
from extras_provider import ExtrasProvider | |
PYTHON_VERSION = Version(python_version()) | |
class Candidate: | |
def __init__(self, name, version, url, extras, provider): | |
self.name, self.version = canonicalize_name(name), version | |
self.url, self.extras, self.provider = url, extras, provider # XXX | |
self._metadata = None | |
def __repr__(self): | |
if not self.extras: return f'<{self.name}=={self.version}>' | |
return f"<{self.name}[{','.join(self.extras)}]=={self.version}>" | |
@property | |
def metadata(self): | |
if self._metadata is None: | |
self.provider.download() # XXX | |
self.fetch_metadata() # incase the candidate is not the first | |
return self._metadata | |
def fetch_metadata(self): | |
if self._metadata is not None: return | |
print(f'Downloading {self.name}=={self.version}') | |
data = requests.get(self.url).content | |
with ZipFile(BytesIO(data)) as z: | |
for n in z.namelist(): | |
if n.endswith('.dist-info/METADATA'): | |
p = BytesParser() | |
self._metadata = p.parse(z.open(n), headersonly=True) | |
return | |
self._metadata = EmailMessage() | |
@property | |
def requires_python(self): | |
return self.metadata.get('Requires-Python') | |
@property | |
def dependencies(self): | |
extras = self.extras if self.extras else [''] | |
for d in self.metadata.get_all('Requires-Dist', []): | |
r = Requirement(d) | |
if r.marker is None: | |
yield r | |
else: | |
for e in extras: | |
if r.marker.evaluate({'extra': e}): | |
yield r | |
class Downloader: | |
def __init__(self, *args, **kwargs): | |
self.soon_needed, self.cached_candidates = {}, {} | |
def need_soon(self, candidate): | |
self.soon_needed[candidate.name] = candidate | |
def download(self): # XXX | |
with Pool(10) as pool: | |
pool.map(methodcaller('fetch_metadata'), self.soon_needed.values()) | |
self.soon_needed = {} | |
def get_from_pypi(self, project, extras): | |
url = 'https://pypi.org/simple/{}'.format(project) | |
data = requests.get(url).content | |
doc = html5lib.parse(data, namespaceHTMLElements=False) | |
for i in doc.findall('.//a'): | |
url = i.attrib['href'] | |
# Skip items that need a different Python version | |
py_req = i.attrib.get('data-requires-python') | |
if py_req: | |
spec = SpecifierSet(py_req) | |
if PYTHON_VERSION not in spec: continue | |
path = urlparse(url).path | |
filename = path.rpartition('/')[-1] | |
# We only handle wheels | |
if not filename.endswith('.whl'): continue | |
name, version = filename[:-4].split("-")[:2] | |
# TODO: Handle compatibility tags? | |
try: | |
version = Version(version) | |
except InvalidVersion: | |
# Ignore files with invalid versions | |
continue | |
yield self.make_candidate(name, version, url, extras) # XXX | |
def make_candidate(self, name, version, url, extras): | |
if (name, version) not in self.cached_candidates: | |
self.cached_candidates[(name, version)] = Candidate( | |
name, version, url, extras, self) | |
return self.cached_candidates[(name, version)] | |
class PyPIProvider(ExtrasProvider, Downloader): | |
def identify(self, requirement_or_candidate): | |
return canonicalize_name(requirement_or_candidate.name) | |
def get_extras_for(self, requirement_or_candidate): | |
# Extras is a set, which is not hashable | |
return tuple(sorted(requirement_or_candidate.extras)) | |
def get_base_requirement(self, candidate): | |
return Requirement('{0.name}=={0.version}'.format(candidate)) | |
def get_preference(self, resolution, candidates, information): | |
return len(candidates) | |
def find_matches(self, requirements): | |
if not requirements: | |
raise RuntimeError('resolver promises at least one requirement') | |
if any(r.extras for r in requirements[1:]): | |
raise RuntimeError('extras not supported in this example') | |
name, candidates = canonicalize_name(requirements[0].name), [] | |
for c in self.get_from_pypi(name, set()): | |
# Need to pass the extras to the search, so they | |
# are added to the candidate at creation - we | |
# treat candidates as immutable once created. | |
if all(c.version in r.specifier for r in requirements): | |
candidates.append(c) | |
candidates.sort(key=attrgetter('version'), reverse=True) | |
try: | |
self.need_soon(candidates[0]) # XXX | |
except IndexError: | |
pass # usually when only sdist provided | |
return candidates | |
def is_satisfied_by(self, requirement, candidate): | |
if canonicalize_name(requirement.name) != candidate.name: return False | |
return candidate.version in requirement.specifier | |
def get_dependencies(self, candidate): | |
return list(candidate.dependencies) | |
def main(): | |
if len(argv) < 2: return print('Usage:', argv[0], '<PyPI project name(s)>') | |
# Create the (reusable) resolver | |
# from my customly defined provider and a default reporter. | |
provider = PyPIProvider() | |
reporter = BaseReporter() | |
resolver = Resolver(provider, reporter) | |
# Kick off the resolution process, and get the final result. | |
result = resolver.resolve([Requirement(r) for r in argv[1:]]) | |
# Display the resolution result. | |
print('\n--- Pinned Candidates ---') | |
for name, candidate in result.mapping.items(): | |
print(f'{name} {candidate.version}') | |
print('\n--- Dependency Graph ---') | |
for name in result.graph: | |
targets = ', '.join(result.graph.iter_children(name)) or None | |
print(f"{name} -> {targets}") | |
if __name__ == '__main__': | |
try: | |
main() | |
except KeyboardInterrupt: | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This program is derived from ResolveLib's examples and is licensed under the same ISC license.