Skip to content

Instantly share code, notes, and snippets.

@chongzhou96
Forked from fanyix/download.py
Created December 7, 2019 00:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chongzhou96/67f4f4a53defe6e73a927a0ed789d30f to your computer and use it in GitHub Desktop.
Save chongzhou96/67f4f4a53defe6e73a927a0ed789d30f to your computer and use it in GitHub Desktop.
download using aria2c
###################################################################################
## Download (EPIC)
###################################################################################
import os
import math
import argparse
# params
save_dir = '/home/fanyix/code/lfb/data/epic/downloads'
list_file = '/home/fanyix/code/lfb/src/dataset_tools/epic/download.sh'
st_str = 'https://data.bris.ac.uk'
ed_str = '.MP4'
threads = 5
# get the argument
parser = argparse.ArgumentParser(description='Download EPIC')
parser.add_argument('num', help="job num", default=None, type=int)
parser.add_argument('id', help="job id", default=None, type=int)
args = parser.parse_args()
# get all links
with open(list_file, 'r') as h:
lines = h.readlines()
links = []
for line in lines:
if st_str in line and ed_str in line:
st = line.find(st_str)
ed = line.find(ed_str)
fields = line.split('"')
fields = [x for x in fields if len(x)>0 and x!='\n']
info = {'link': line[st: ed+len(ed_str)],
'path': fields[-1]}
links.append(info)
# distribute the task
share = math.ceil(float(len(links)) / args.num)
start = int(args.id * share)
end = int(min(start + share - 1, len(links) - 1))
task_links = []
for job_idx in range(start, (end + 1)):
task_links.append(links[job_idx])
links = task_links
# download
cmd = 'aria2c --console-log-level=warn -x%d %s -d %s'
for item_idx, item in enumerate(links):
link = item['link']
# check if already existed
path = os.path.join(save_dir, item['path'])
if os.path.isfile(path):
continue
# mkdir
path_dir = os.path.dirname(path)
if not os.path.exists(path_dir):
os.makedirs(path_dir)
cur_cmd = cmd % (threads, link, path_dir)
os.system(cur_cmd)
print('%d/%d' % (item_idx, len(links)))
print('Done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment