Skip to content

Instantly share code, notes, and snippets.

@CodeMonkeyKevin
Created October 10, 2019 19:09
Show Gist options
  • Save CodeMonkeyKevin/0123bdfccb017ba0f843ca50cba5c37e to your computer and use it in GitHub Desktop.
Save CodeMonkeyKevin/0123bdfccb017ba0f843ca50cba5c37e to your computer and use it in GitHub Desktop.
import time
import requests
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from time import sleep
import urllib.request
import ssl
pool = ThreadPoolExecutor(10)
urls = open("urls.txt","r+").readlines()
# Retrieve a single page and report the url and contents
def load_url(url, timeout):
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with urllib.request.urlopen(url, timeout=timeout, context=ctx) as conn:
return conn.read()
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
# Start the load operations and mark each future with its URL
future_to_url = {executor.submit(load_url, url.strip(), 60): url for url in urls}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url.strip(), exc))
else:
print('%r page is %d bytes' % (url.strip(), len(data)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment