Skip to content

Instantly share code, notes, and snippets.

@pfmoore
Created July 5, 2023 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfmoore/303f42bd713e6f90c00b093b8c31e9e8 to your computer and use it in GitHub Desktop.
Save pfmoore/303f42bd713e6f90c00b093b8c31e9e8 to your computer and use it in GitHub Desktop.
Download metadata files from PyPI
import hashlib
import json
from multiprocessing.dummy import Pool
from pathlib import Path
import urllib3
DOWNLOAD_CACHE = Path("DownloadedMetadata")
class Downloader:
def __init__(self):
self.pool_mgr = urllib3.PoolManager(maxsize=10)
def download(self, filename: str, url: str):
target = DOWNLOAD_CACHE / filename
resp = self.pool_mgr.request("GET", url)
target.write_bytes(resp.data)
def already_downloaded(filename: str, hashes: bool | dict[str, str]):
cache_entry = DOWNLOAD_CACHE / filename
if not cache_entry.exists():
return False
# print(f"{filename} downloaded")
if isinstance(hashes, dict):
data = cache_entry.read_bytes()
for name, hash in hashes.items():
existing_hash = hashlib.new(name, data).hexdigest()
if existing_hash != hash:
print(f"{filename}: {name} hash does not match")
return False
return True
def get_metadata_list(pypi: str):
print(f"Reading data from {pypi}... ", end="", flush=True)
with open(pypi, "rb") as f:
data = json.load(f)
print("OK", flush=True)
for project in data["projects"]:
for file in project.get("files", []):
metadata = file.get("data-dist-info-metadata", False)
filename = file["filename"] + ".metadata"
url = file["url"] + ".metadata"
if metadata and not already_downloaded(filename, metadata):
yield filename, url
if __name__ == "__main__":
import sys
d = Downloader()
to_fetch = list(get_metadata_list(sys.argv[1]))
print(f"{len(to_fetch)} downloads still to complete")
#to_fetch = to_fetch[:10000]
with Pool() as p:
results = p.starmap(d.download, to_fetch)
print(f"{len(results)} files downloaded")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment