Skip to content

Instantly share code, notes, and snippets.

Created November 8, 2018 04:02
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save YieldNull/cb3a881bad7862d35a57df9cc572090b to your computer and use it in GitHub Desktop.
A Multithread Downloader
import os
import logging
import requests
from multiprocessing.pool import ThreadPool
from multiprocessing import Queue
def download(task_file, repository, pool_size=8):
logger = logging.getLogger('downloader')
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
tasks = []
with open(task_file, 'r') as f:
for url in f.readlines():
if url.strip():
pool = ThreadPool(pool_size)
feedback = Queue()
def handle(url):
filename = os.path.basename(url)
filepath = os.path.join(repository, filename)
with open(filepath, 'wb') as f:
f.write(requests.get(url).content)'DONE %s', url)
except Exception as e:
logger.warning('FAILED %s %s', url, repr(e))
for index, url in enumerate(tasks):
if index < pool_size or feedback.get():
pool.apply_async(handle, args=(url,))
pool.join()'DONE ALL')
if __name__ == '__main__':
import sys
download(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment