Skip to content

Instantly share code, notes, and snippets.

@victoriadrake
Created February 28, 2020 17:03
Show Gist options
  • Save victoriadrake/ad5aeeca74700bc87dc89ae834a3893a to your computer and use it in GitHub Desktop.
Save victoriadrake/ad5aeeca74700bc87dc89ae834a3893a to your computer and use it in GitHub Desktop.
```python
# Create the Checker class
class Checker:
# Queue of links to be checked
TO_PROCESS = Queue()
# Maximum workers to run
THREADS = 100
# Maximum seconds to wait for HTTP response
TIMEOUT = 60
def __init__(self, url):
...
# Create the thread pool
self.pool = futures.ThreadPoolExecutor(max_workers=self.THREADS)
def run(self):
# Run until the TO_PROCESS queue is empty
while True:
try:
target_url = self.TO_PROCESS.get(block=True, timeout=2)
# If we haven't already checked this link
if target_url["url"] not in self.visited:
# Mark it as visited
self.visited.add(target_url["url"])
# Submit the link to the pool
job = self.pool.submit(self.load_url, target_url, self.TIMEOUT)
job.add_done_callback(self.handle_future)
except Empty:
return
except Exception as e:
print(e)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment