Skip to content

Instantly share code, notes, and snippets.

@julien-c
Created September 9, 2021 21:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save julien-c/b2dcde5df5d5e41ad7c4b594cb54aba3 to your computer and use it in GitHub Desktop.
Save julien-c/b2dcde5df5d5e41ad7c4b594cb54aba3 to your computer and use it in GitHub Desktop.
import os
import subprocess
from huggingface_hub.hf_api import HfApi
from huggingface_hub.repository import Repository
os.makedirs("./Helsinki-NLP/", exist_ok=True)
os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"
# Important, to not download the large files
api = HfApi()
models = [
m for m in api.list_models(full=True) if m.modelId.startswith("Helsinki-NLP/")
]
models_wo_license = [
m for m in models if not any(t.startswith("license:") for t in m.tags)
]
for m in models_wo_license:
local_dir = m.modelId
url = f"https://huggingface.co/{m.modelId}"
subprocess.run(
["git", "clone", url, local_dir],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
check=True,
encoding="utf-8",
)
# Do not clone_from with Repository as
# it forces to download lfs files apparently
repo = Repository(
local_dir=local_dir,
use_auth_token=False,
)
# Prevent use_auth_token otherwise
# my username/email is overriden by whoami.
# I want to use my local ones.
data = repo.repocard_metadata_load()
data["license"] = "apache-2.0"
repo.repocard_metadata_save(data)
# p = subprocess.run(
# ["git", "diff"],
# stderr=subprocess.PIPE,
# stdout=subprocess.PIPE,
# check=True,
# encoding="utf-8",
# cwd=local_dir
# )
# print(p.stdout)
repo.git_add()
repo.git_commit("metadata: add license")
print(repo.git_push())
print()
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment