Last active
September 2, 2017 20:33
-
-
Save ajduberstein/25ad4aafa91837a32cb3ffc4a87786a8 to your computer and use it in GitHub Desktop.
concurrent_requests.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import math | |
from Queue import Queue | |
import sys | |
from threading import Thread | |
import time | |
import requests | |
""" | |
Inspired by | |
https://stackoverflow.com/questions/23547604/python-counter-atomic-increment | |
https://stackoverflow.com/questions/2632520/what-is-the-fastest-way-to-send-100-000-http-requests-in-python | |
""" | |
class ConcurrentFetcher(object): | |
"""Concurrently fetches multiple URLs""" | |
def __init__( | |
self, | |
urls_list=None, | |
sec_between_executions=1, | |
max_simultaneous_executions=5, | |
processing_callback=None | |
): | |
self.queue_size = len(urls_list) | |
self.urls_list = urls_list | |
self.processing_callback = processing_callback | |
self.thread_counter = itertools.count() | |
self.sec_between_executions = sec_between_executions | |
self.max_simultaneous_executions = max_simultaneous_executions | |
def do_work(self): | |
while True: | |
# Sleep check | |
group_count = self.thread_counter.next() | |
sleep_time = self.sec_between_executions *\ | |
math.floor(int(group_count) / self.max_simultaneous_executions) | |
time.sleep(sleep_time) | |
print 'slept for ' + str(sleep_time) | |
# End | |
url = self.q.get() | |
print 'Processing ' + url + '\n' | |
o = requests.get(url) | |
if self.processing_callback: | |
self.processing_callback(o, url) | |
else: | |
print o.status_code, url | |
print '\n' | |
self.q.task_done() | |
def run(self): | |
self.q = Queue(self.queue_size * 2) | |
for i in range(self.queue_size): | |
t = Thread(target=self.do_work) | |
t.daemon = True | |
t.start() | |
try: | |
for url in self.urls_list: | |
self.q.put(url.strip()) | |
self.q.join() | |
except KeyboardInterrupt: | |
sys.exit(1) | |
except Exception: | |
sys.exit(2) | |
if __name__ == "__main__": | |
# As a demo, run `python -m SimpleHTTPServer 3000` in a terminal tab | |
# and then run `python ./concurrent_requests.py` | |
URLS_LIST = [ | |
"http://localhost:3000/1", | |
"http://localhost:3000/11", | |
"http://localhost:3000/111", | |
"http://localhost:3000/1111", | |
"http://localhost:3000/11111", | |
"http://localhost:3000/2", | |
"http://localhost:3000/22", | |
"http://localhost:3000/222", | |
"http://localhost:3000/2222", | |
"http://localhost:3000/22222", | |
"http://localhost:3000/3", | |
"http://localhost:3000/33", | |
"http://localhost:3000/333", | |
"http://localhost:3000/3333", | |
"http://localhost:3000/33333", | |
"http://localhost:3000/4", | |
"http://localhost:3000/44", | |
"http://localhost:3000/444", | |
"http://localhost:3000/4444", | |
] | |
cf = ConcurrentFetcher(urls_list=URLS_LIST) | |
cf.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment