Skip to content

Instantly share code, notes, and snippets.

@McSinyx
Last active June 2, 2020 04:21
Show Gist options
  • Save McSinyx/513dbff71174fcc79f1cb600e09881af to your computer and use it in GitHub Desktop.
Save McSinyx/513dbff71174fcc79f1cb600e09881af to your computer and use it in GitHub Desktop.
Resolve wheel requirements from PyPI with parallel download
"""A provider that handles packages with "extras".
Python package dependencies can include "extras", which are additional
dependencies that are installed "on demand". For instance, project X could
have an additional set of dependencies if PDF generation features are needed.
These can be defined for an extra "pdf" and requested on install as X[pdf].
The basic resolvelib algorithm cannot handle extras, as it builds a dependency
graph which needs to be static - the edges (dependencies) from a node
(candidate) must be fixed. Extras break this assumption.
To model projects with extras, we define a candidate as being a project with a
specific set of dependencies. This introduces a problem, as the resolver could
produce a solution that demands version 1.0 of X[foo] and version 2.0 of
X[bar]. This is impossible, as there is actually only one project X to be
installed. To address this, we inject an additional dependency for every
candidate with an extra - X[foo] version v depends on X version v. By doing
this, we constrain the solution to require a unique version of X.
"""
from resolvelib.providers import AbstractProvider
class ExtrasProvider(AbstractProvider):
"""A provider that handles extras.
"""
def get_extras_for(self, requirement_or_candidate):
"""Given a requirement or candidate, return its extras.
The extras should be a hashable value.
"""
raise NotImplementedError
def get_base_requirement(self, candidate):
"""Given a candidate, return a requirement that specifies that
project/version.
"""
raise NotImplementedError
def identify(self, requirement_or_candidate):
base = super(ExtrasProvider, self).identify(requirement_or_candidate)
extras = self.get_extras_for(requirement_or_candidate)
if extras:
return (base, extras)
else:
return base
def get_dependencies(self, candidate):
deps = super(ExtrasProvider, self).get_dependencies(candidate)
if candidate.extras:
req = self.get_base_requirement(candidate)
deps.append(req)
return deps
Downloading oslo-utils==1.4.0
Downloading pbr==0.11.1
Downloading babel==2.8.0
Downloading six==1.15.0
Downloading iso8601==0.1.12
Downloading oslo-i18n==4.0.1
Downloading netaddr==0.7.19
Downloading netifaces==0.10.9
Downloading pip==20.1.1
Downloading pytz==2020.1
Downloading oslo-i18n==4.0.0
Downloading oslo-i18n==3.25.1
Downloading oslo-i18n==3.25.0
Downloading oslo-i18n==3.24.0
Downloading oslo-i18n==3.23.1
Downloading oslo-i18n==3.23.0
Downloading oslo-i18n==3.22.1
Downloading oslo-i18n==3.22.0
Downloading oslo-i18n==3.21.0
Downloading oslo-i18n==3.20.0
Downloading oslo-i18n==3.19.0
Downloading oslo-i18n==3.18.0
Downloading oslo-i18n==3.17.2
Downloading oslo-i18n==3.17.1
Downloading oslo-i18n==3.17.0
Downloading oslo-i18n==3.16.0
Downloading oslo-i18n==3.15.3
Downloading oslo-i18n==3.15.2
Downloading oslo-i18n==3.15.1
Downloading oslo-i18n==3.15.0
Downloading oslo-i18n==3.14.0
Downloading oslo-i18n==3.13.0
Downloading oslo-i18n==3.12.0
Downloading oslo-i18n==3.11.0
Downloading oslo-i18n==3.10.0
Downloading oslo-i18n==3.9.0
Downloading oslo-i18n==3.8.0
Downloading oslo-i18n==3.7.0
Downloading oslo-i18n==3.6.0
Downloading oslo-i18n==3.5.0
Downloading oslo-i18n==3.4.0
Downloading oslo-i18n==3.3.0
Downloading oslo-i18n==3.2.0
Downloading oslo-i18n==3.1.0
Downloading oslo-i18n==3.0.0
Downloading oslo-i18n==2.7.0
Downloading oslo-i18n==2.6.0
Downloading oslo-i18n==2.5.0
Downloading oslo-i18n==2.4.0
Downloading oslo-i18n==2.3.0
Downloading oslo-i18n==2.2.0
Downloading oslo-i18n==2.1.0
--- Pinned Candidates ---
oslo-utils 1.4.0
iso8601 0.1.12
six 1.15.0
netaddr 0.7.19
pbr 0.11.1
babel 2.8.0
pytz 2020.1
oslo-i18n 2.1.0
pip 20.1.1
netifaces 0.10.9
--- Dependency Graph ---
None -> oslo-utils
pip -> None
oslo-utils -> pbr, iso8601, netaddr, oslo-i18n, six, netifaces, babel
pbr -> pip
iso8601 -> None
oslo-i18n -> six, babel, pbr
six -> None
netaddr -> None
netifaces -> None
babel -> pytz
pytz -> None
#!/usr/bin/env python3
from email.message import EmailMessage
from email.parser import BytesParser
from io import BytesIO
from multiprocessing.dummy import Pool
from operator import attrgetter, methodcaller
from platform import python_version
from sys import argv
from urllib.parse import urlparse
from zipfile import ZipFile
import requests
import html5lib
from packaging.specifiers import SpecifierSet
from packaging.version import Version, InvalidVersion
from packaging.requirements import Requirement
from packaging.utils import canonicalize_name
from resolvelib import BaseReporter, Resolver
from extras_provider import ExtrasProvider
PYTHON_VERSION = Version(python_version())
class Candidate:
def __init__(self, name, version, url, extras, provider):
self.name, self.version = canonicalize_name(name), version
self.url, self.extras, self.provider = url, extras, provider # XXX
self._metadata = None
def __repr__(self):
if not self.extras: return f'<{self.name}=={self.version}>'
return f"<{self.name}[{','.join(self.extras)}]=={self.version}>"
@property
def metadata(self):
if self._metadata is None:
self.provider.download() # XXX
self.fetch_metadata() # incase the candidate is not the first
return self._metadata
def fetch_metadata(self):
if self._metadata is not None: return
print(f'Downloading {self.name}=={self.version}')
data = requests.get(self.url).content
with ZipFile(BytesIO(data)) as z:
for n in z.namelist():
if n.endswith('.dist-info/METADATA'):
p = BytesParser()
self._metadata = p.parse(z.open(n), headersonly=True)
return
self._metadata = EmailMessage()
@property
def requires_python(self):
return self.metadata.get('Requires-Python')
@property
def dependencies(self):
extras = self.extras if self.extras else ['']
for d in self.metadata.get_all('Requires-Dist', []):
r = Requirement(d)
if r.marker is None:
yield r
else:
for e in extras:
if r.marker.evaluate({'extra': e}):
yield r
class Downloader:
def __init__(self, *args, **kwargs):
self.soon_needed, self.cached_candidates = {}, {}
def need_soon(self, candidate):
self.soon_needed[candidate.name] = candidate
def download(self): # XXX
with Pool(10) as pool:
pool.map(methodcaller('fetch_metadata'), self.soon_needed.values())
self.soon_needed = {}
def get_from_pypi(self, project, extras):
url = 'https://pypi.org/simple/{}'.format(project)
data = requests.get(url).content
doc = html5lib.parse(data, namespaceHTMLElements=False)
for i in doc.findall('.//a'):
url = i.attrib['href']
# Skip items that need a different Python version
py_req = i.attrib.get('data-requires-python')
if py_req:
spec = SpecifierSet(py_req)
if PYTHON_VERSION not in spec: continue
path = urlparse(url).path
filename = path.rpartition('/')[-1]
# We only handle wheels
if not filename.endswith('.whl'): continue
name, version = filename[:-4].split("-")[:2]
# TODO: Handle compatibility tags?
try:
version = Version(version)
except InvalidVersion:
# Ignore files with invalid versions
continue
yield self.make_candidate(name, version, url, extras) # XXX
def make_candidate(self, name, version, url, extras):
if (name, version) not in self.cached_candidates:
self.cached_candidates[(name, version)] = Candidate(
name, version, url, extras, self)
return self.cached_candidates[(name, version)]
class PyPIProvider(ExtrasProvider, Downloader):
def identify(self, requirement_or_candidate):
return canonicalize_name(requirement_or_candidate.name)
def get_extras_for(self, requirement_or_candidate):
# Extras is a set, which is not hashable
return tuple(sorted(requirement_or_candidate.extras))
def get_base_requirement(self, candidate):
return Requirement('{0.name}=={0.version}'.format(candidate))
def get_preference(self, resolution, candidates, information):
return len(candidates)
def find_matches(self, requirements):
if not requirements:
raise RuntimeError('resolver promises at least one requirement')
if any(r.extras for r in requirements[1:]):
raise RuntimeError('extras not supported in this example')
name, candidates = canonicalize_name(requirements[0].name), []
for c in self.get_from_pypi(name, set()):
# Need to pass the extras to the search, so they
# are added to the candidate at creation - we
# treat candidates as immutable once created.
if all(c.version in r.specifier for r in requirements):
candidates.append(c)
candidates.sort(key=attrgetter('version'), reverse=True)
try:
self.need_soon(candidates[0]) # XXX
except IndexError:
pass # usually when only sdist provided
return candidates
def is_satisfied_by(self, requirement, candidate):
if canonicalize_name(requirement.name) != candidate.name: return False
return candidate.version in requirement.specifier
def get_dependencies(self, candidate):
return list(candidate.dependencies)
def main():
if len(argv) < 2: return print('Usage:', argv[0], '<PyPI project name(s)>')
# Create the (reusable) resolver
# from my customly defined provider and a default reporter.
provider = PyPIProvider()
reporter = BaseReporter()
resolver = Resolver(provider, reporter)
# Kick off the resolution process, and get the final result.
result = resolver.resolve([Requirement(r) for r in argv[1:]])
# Display the resolution result.
print('\n--- Pinned Candidates ---')
for name, candidate in result.mapping.items():
print(f'{name} {candidate.version}')
print('\n--- Dependency Graph ---')
for name in result.graph:
targets = ', '.join(result.graph.iter_children(name)) or None
print(f"{name} -> {targets}")
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print()
@McSinyx
Copy link
Author

McSinyx commented Jun 1, 2020

This program is derived from ResolveLib's examples and is licensed under the same ISC license.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment