public
Created

Simple script for faster fetching with urllib2 with exception handling

  • Download Gist
fetch_url.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
import threading, urllib2
import Queue
 
def read_url(url, queue):
try:
data = urllib2.urlopen(url).read()
except urllib2.HTTPError, e:
checksLogger.error('HTTPError = ' + str(e.code))
except urllib2.URLError, e:
checksLogger.error('URLError = ' + str(e.reason))
except httplib.HTTPException, e:
checksLogger.error('HTTPException')
except Exception:
import traceback
checksLogger.error('generic exception: ' + traceback.format_exc())
 
print('Fetched %s from %s' % (len(data), url))
queue.put(data)
 
def fetch_parallel(list_of_urls):
result = Queue.Queue()
threads = [threading.Thread(target=read_url, args = (url,result)) for url in list_of_urls]
for t in threads:
t.start()
for t in threads:
t.join()
return result

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.