Skip to content

Instantly share code, notes, and snippets.

@daskol
Last active June 28, 2023 13:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daskol/8fa9e1c93e410526ab53d9d649fbaf19 to your computer and use it in GitHub Desktop.
Save daskol/8fa9e1c93e410526ab53d9d649fbaf19 to your computer and use it in GitHub Desktop.
Download a repo from HuggingFace Hub with aria2c.
#!/usr/bin/env python3
"""Little script for generating a download list for fetching model weights and
configuration files of a model from HuggingFace Hub. With download list is
ready, you can easily fetch all files with throatling and supspending or
resuming with `aria2c` as an example: aria2c -c -i index.txt.
"""
from pathlib import Path
from argparse import ArgumentParser, Namespace
from huggingface_hub import HfApi, hf_hub_url
parser = ArgumentParser(description=__doc__)
parser.add_argument('--output-dir',
default=Path(''),
type=Path,
help='where to store files (default: cwd)')
parser.add_argument('model_id', help='model identifier in huggingface hub')
parser.add_argument('download_list',
default=Path('index.txt'),
type=Path,
nargs='?',
help='where to write download list (default: index.txt)')
def main(args: Namespace):
with open(args.download_list, 'w') as fout:
make_download_list(fout, args.model_id, args.output_dir)
def make_download_list(fout, model_id: str, output_dir: Path, hf_api=None):
if hf_api is None:
hf_api = HfApi()
model_info = hf_api.model_info(model_id)
model_dir = '--'.join(['model', *model_id.split('/')]) # repo_folder_name
output_dir = output_dir / model_dir / model_info.sha
for ent in model_info.siblings:
url = hf_hub_url(model_id, ent.rfilename)
path = output_dir / ent.rfilename
fout.write(url)
fout.write(f'\n out={path}\n')
if __name__ == '__main__':
main(parser.parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment