Skip to content

Instantly share code, notes, and snippets.

@jinyu121
Created July 29, 2019 12:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jinyu121/f448f8559b7d08906b1d89077e1136d6 to your computer and use it in GitHub Desktop.
Save jinyu121/f448f8559b7d08906b1d89077e1136d6 to your computer and use it in GitHub Desktop.
Batch Downloader

Batch Downloader

Give a txt which contains urls to download, this script will download them for you.

Requre

  • tqdm
import csv
import time
from argparse import ArgumentParser
from concurrent import futures
from pathlib import Path
from urllib import request
from tqdm import tqdm
def download(url: str, save_dir: Path, sleep_time: int = 1):
save_path = save_dir / Path(url).name
request.urlretrieve(url, save_path)
time.sleep(sleep_time)
return 'OK'
def main():
parser = ArgumentParser()
parser.add_argument("--data", type=str, default="data.txt")
parser.add_argument("--data_index", type=int, default=0)
parser.add_argument("--save", type=str, default="Download")
parser.add_argument("--fail", type=str, default="fail.txt")
parser.add_argument("--threads", type=int, default=20)
parser.add_argument("--sleep", type=int, default=1)
args = parser.parse_args()
data_path = Path(args.data)
data = csv.reader(data_path.open(), delimiter=',')
save_path = Path(args.save)
save_path.mkdir(exist_ok=True, parents=True)
fail_record = Path(args.fail)
fails = []
with futures.ThreadPoolExecutor(max_workers=args.threads) as executor:
jobs = {
executor.submit(download, line[args.data_index], save_path, args.sleep): ith for ith, line in enumerate(data)
}
for future in tqdm(futures.as_completed(jobs), total=len(jobs)):
try:
status = future.result()
except Exception as e:
tqdm.write("{} : {}".format(jobs[future], e))
fails.append(jobs[future])
else:
tqdm.write("{} : {}".format(jobs[future], status))
fails = ["{}\n".format(x) for x in fails]
fail_record.open("a").writelines(fails)
if "__main__" == __name__:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment