Skip to content

Instantly share code, notes, and snippets.

@pradyunsg
Last active December 4, 2023 13:15
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pradyunsg/22ca089b48ca55d75ca843a5946b2691 to your computer and use it in GitHub Desktop.
Save pradyunsg/22ca089b48ca55d75ca843a5946b2691 to your computer and use it in GitHub Desktop.
Figuring out the top-level importable names from a wheel
"""Takes a .whl file and figures out the top-level importable names in that wheel.
Usage:
$ python find-top-level-from-wheel-file.py ./setuptools-65.4.1-py3-none-any.whl
['_distutils_hack', 'pkg_resources', 'setuptools']
Testing:
$ pytest find-top-level-from-wheel-file.py
...
===== 2 passed in 0.01s =====
x-ref: https://github.com/PyO3/maturin/issues/1154#issuecomment-1264498648
"""
# Licensed under the MIT license.
#
# Copyright (c) 2022 Pradyun Gedam <mail@pradyunsg.me>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import json
import sys
from collections import deque
from typing import Iterable
from installer.sources import WheelSource
from installer.utils import parse_metadata_file
def _find_importable_components_from_wheel_content_listing(
filepaths: Iterable[str], *, dist_info_dir: str, data_dir: str
) -> Iterable[tuple[str, ...]]:
purelib_str = f"{data_dir}/purelib/"
platlib_str = f"{data_dir}/platlib/"
for path in filepaths:
if path.startswith(dist_info_dir):
# Nothing in dist-info is importable.
continue
if path.startswith((platlib_str, purelib_str)):
# Remove the prefix from purelib and platlib files.
name = path[len(platlib_str) :]
elif path.startswith(data_dir):
# Nothing else in data is importable.
continue
else:
# Top level files end up in an importable location.
name = path
if name.endswith(".py"):
yield tuple(name[: -len(".py")].split("/"))
def test_find_importable_components_from_wheel_content_listing():
# GIVEN
filepaths = [
"zero.py",
"foo.data/purelib/one.py",
"foo.data/purelib/two/three.py",
"foo.data/platlib/four.py",
"foo.data/platlib/five/six.py",
"foo.data/scripts/six.py",
"foo.data/scripts/seven/eight.py",
"foo.dist-info/nine.py",
]
data_dir = "foo.data"
dist_info_dir = "foo.dist-info"
# WHEN
result = _find_importable_components_from_wheel_content_listing(
filepaths, data_dir=data_dir, dist_info_dir=dist_info_dir
)
# THEN
assert list(result) == [
("zero",),
("one",),
("two", "three"),
("four",),
("five", "six"),
]
def _determine_major_import_names(
importable_components: Iterable[tuple[str, ...]]
) -> Iterable[str]:
# If you literally want the "top level", just do...
# return {components[0] for components in importable_components}
# Here, we're going to try to find the longest initial import name instead.
# Mostly, because this was a fun problem to thing through.
# Build a tree out of the components
tree = {}
for components in importable_components:
subtree = tree
for segment in components:
if segment not in subtree:
subtree[segment] = {}
subtree = subtree[segment]
# Recurse through the tree to find the names which have != 1 children.
queue = deque()
queue.appendleft((tree, ()))
while queue:
current_tree, current_name = queue.popleft()
for name, subtree in current_tree.items():
subname = (*current_name, name)
if len(subtree) == 1:
queue.append((subtree, subname))
elif name == "__init__":
yield ".".join(current_name)
else:
yield ".".join(subname)
def test_determine_major_import_names():
# GIVEN
components = [
("zero",),
("one", "__init__"),
("two",),
("two", "three"),
("two", "four"),
("five", "six", "seven"),
("five", "six", "eight"),
("nine", "ten", "__init__"),
("eleven", "twelve", "__init__"),
("eleven", "twelve", "thirteen", "__init__"),
("eleven", "twelve", "fourteen", "__init__"),
]
# WHEN
result = _determine_major_import_names(components)
# THEN
assert set(result) == {
"zero",
"one",
"two",
"five.six",
"nine.ten",
"eleven.twelve",
}
def find_major_import_import_names(wheel: WheelSource) -> Iterable[str]:
metadata = parse_metadata_file(wheel.read_dist_info("WHEEL"))
if not (metadata["Wheel-Version"] and metadata["Wheel-Version"].startswith("1.")):
raise NotImplementedError("Only supports wheel 1.x")
filepaths: Iterable[str] = (
record_elements[0] for record_elements, _, _ in wheel.get_contents()
)
importable_components = _find_importable_components_from_wheel_content_listing(
filepaths, dist_info_dir=wheel.dist_info_dir, data_dir=wheel.data_dir
)
return _determine_major_import_names(importable_components)
if __name__ == "__main__":
import zipfile
from installer.sources import WheelFile
path: str = sys.argv[1]
with zipfile.ZipFile(path) as archive:
wheel_file = WheelFile(archive)
print(list(find_major_import_import_names(wheel_file)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment