Skip to content

Instantly share code, notes, and snippets.

@jboynyc
Created February 9, 2023 15:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jboynyc/8576d2ad86eb6af415dd9e1fa468fcc8 to your computer and use it in GitHub Desktop.
Save jboynyc/8576d2ad86eb6af415dd9e1fa468fcc8 to your computer and use it in GitHub Desktop.
update spacy models in nixpkgs
#!/usr/bin/env nix-shell
#! nix-shell -i python3 -p "python3.withPackages (p: with p; [sh tqdm])"
"""
Collect metadata on spaCy language models of a particular release to update the
models.json file in nixpkgs/pkgs/development/python-modules/spacy
Run this script in the meta dir of the spacy-models repository, providing the
desired release number as its only argument.
"""
import json
from sys import argv
from pathlib import Path
from tqdm import tqdm
from sh import nix_prefetch_url
licenses = {
"CC BY-NC-SA 3.0": "cc-by-nc-sa-30",
"CC BY-SA 3.0": "cc-by-sa-30",
"CC BY-SA 4.0": "cc-by-sa-40",
"GNU GPL 3.0": "gpl3",
"LGPL-LR": "lgpllr",
"MIT": "mit",
}
def collect_metadata(release: str) -> list:
metadata = []
for p in Path(".").glob(f"*-{release}.json"):
j = json.loads(p.read_text())
license = licenses[j["license"]]
pname, version = p.stem.split("-")
if req := j["requirements"]:
print(f"{pname} has requirements {req}")
yield {"pname": pname, "version": version, "license": license}
def get_model_hash(model: dict) -> dict:
BASE_URL = "https://github.com/explosion/spacy-models/releases/download/{pname}-{version}/{pname}-{version}.tar.gz"
loc = BASE_URL.format(**model)
sha = nix_prefetch_url(loc, "--name", model["pname"]).strip()
return sha
def main():
try:
release = argv[1]
except IndexError:
raise Exception("Please provide a release number.")
metadata = list(collect_metadata(release))
new_models = []
for model in tqdm(metadata):
new_models.append({**model, "sha256": get_model_hash(model)})
with open(f"models-{release}.json", "w") as out:
json.dump(new_models, out, indent=2)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment