Skip to content

Instantly share code, notes, and snippets.

@IceflowRE
Created February 19, 2017 21:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IceflowRE/cb61150a75de5fa0e54882fbdddc0e53 to your computer and use it in GitHub Desktop.
Save IceflowRE/cb61150a75de5fa0e54882fbdddc0e53 to your computer and use it in GitHub Desktop.
Download a list of links from a file (link_list.txt)
import functools
from concurrent import futures
import urllib3, certifi
from pathlib import Path
address = 'github.com'
done_links = 0
file_list = []
download_path = Path('./files/')
downloader = urllib3.HTTPSConnectionPool(address, maxsize=8, cert_reqs='CERT_REQUIRED',
ca_certs=certifi.where())
def create_download_list():
global file_list
temp_list = []
with open('link_list.txt') as f:
temp_list = f.readlines()
for link in temp_list:
file_list.append(link[len(address):][:-1])
def print_progress(full_percentage, job):
"""
Callback function prints a progress bar.
:param full_percentage: The number which is 100%.
:param job: The multi processing job result.
"""
global done_links
done_links += 1
if full_percentage != 0:
print('\r' + 'Progress: %d/%d | %d %%' % (
done_links, full_percentage, round((100 / full_percentage * done_links), 1)), end='')
else:
print('\r' + 'Error for full_percentage....', end='')
def download_file(url, target_path: Path):
"""
Download one file.
"""
download_success = "TRUE"
try:
while target_path.exists():
target_path = Path(str(target_path) + "_d")
with downloader.request('GET', url, preload_content=False, retries=urllib3.util.retry.Retry(3)) as reader:
if reader.status == 200:
with target_path.open(mode='wb') as out_file:
out_file.write(reader.data)
else:
raise urllib3.exceptions.HTTPError(str(reader.status))
except Exception as exception:
download_success = "DOWNLOAD ERROR " + str(exception) + ": " + url
return download_success
def download_files():
"""
Download the files.
"""
print('== DOWNLOAD FILES ==')
with futures.ProcessPoolExecutor(max_workers=8) as executor:
for link in file_list:
# remove invalid file chars from file name
list = link.split("/")
file_name = list[len(list) - 1]
job = executor.submit(download_file, link, download_path.joinpath(file_name))
job.add_done_callback(functools.partial(print_progress, len(file_list)))
print()
def check_files():
for link in file_list:
list = link.split("/")
file_name = list[len(list) - 1]
path = download_path.joinpath(file_name)
if not path.is_file():
print("Link " + link + " was not downloaded.")
if __name__ == "__main__":
if not download_path.exists():
Path.mkdir(download_path)
create_download_list()
for i in file_list:
print(i)
download_files()
check_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment