Skip to content

Instantly share code, notes, and snippets.

@cosmoscalibur
Created December 15, 2019 01:22
Show Gist options
  • Save cosmoscalibur/15a8544f261885979136f29a8ccc5cbb to your computer and use it in GitHub Desktop.
Save cosmoscalibur/15a8544f261885979136f29a8ccc5cbb to your computer and use it in GitHub Desktop.
Download files from a DirectoryIndex
""" Download files from a DirectoryIndex.
"""
from requests_html import HTMLSession
from os import path
from urllib import request
import shutil
def download_files(url_repo: str, out_path: str):
"""Download in `out_path` all files of DirectoryIndex in `url_repo`.
Parameters
----------
url
Directory Index URL where files are located.
out_path
Local directory to save files.
"""
session = HTMLSession()
url_repo = path.dirname(url_repo)
out_path = path.expanduser(path.expandvars(path.abspath(out_path)))
site = session.get(url_repo)
csv_urls = site.html.absolute_links
total_csv = len(csv_urls)
counter = 0
for url in csv_urls:
counter = counter + 1
if url[-3:] == "csv":
with request.urlopen(url) as response,\
open(path.join(out_path, path.basename(url)), 'wb') as csv_file:
shutil.copyfileobj(response, csv_file)
print("Downloaded {} of {}: {}".format(counter, total_csv, path.basename(url)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment