Skip to content

Instantly share code, notes, and snippets.

@mumbleskates
Created May 5, 2019 22:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mumbleskates/db5b73cb5195802cc48efaca1c9a85a3 to your computer and use it in GitHub Desktop.
Save mumbleskates/db5b73cb5195802cc48efaca1c9a85a3 to your computer and use it in GitHub Desktop.
from threading import Thread
import requests
from multithreading import Channel
def fetch_all_with_pool(iterable_of_urls, pool_size=20):
"""
Fetches URLs with a thread pool.
:param iterable_of_urls:
Some iterable that yields URLs to fetch.
:param pool_size:
Number of threads to use.
:return:
Yields all the responses from the URLs.
"""
url_channel = Channel(maxsize=pool_size)
output_channel = Channel(maxsize=pool_size)
def worker():
for url in url_channel:
output_channel.put(requests.get(url))
def fetch_pipeline():
"""
Runs the actual thread pool that processes from the url channel
into the output channel.
"""
thread_pool = [Thread(target=worker) for _ in range(pool_size)]
for thread in thread_pool:
thread.start()
# Process all the URLs
url_channel.put_all(iterable_of_urls)
# Wait for the fetching to be done
for thread in thread_pool:
thread.join()
# Signal that the pipeline is done
output_channel.close()
Thread(target=fetch_pipeline).start()
# Yield all the results from the main thread as a generator.
yield from output_channel
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment