Skip to content

Instantly share code, notes, and snippets.

@capttwinky
Created August 11, 2011 18:43
Show Gist options
  • Save capttwinky/1140398 to your computer and use it in GitHub Desktop.
Save capttwinky/1140398 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from Queue import Queue
from threading import Thread
from urllib2 import urlopen
from time import time
from random import shuffle
hosts = ["yahoo.com", "google.com", "amazon.com","ibm.com", "apple.com",
"bbc.co.uk", "npr.org", "cnn.com", "ubuntu.com", "soundcloud.com",
"bogus.url",'ebay.com','linux.org','osuosl.org','renren.com','alibaba.com',
"dell.com","senate.gov","microsoft.com","bing.com","ideamerge.com",
"nasa.gov",'archive.org',"wikipedia.com","python.org","aol.com","trimet.org",
"arstechnica.com","redit.com","steinbarts.com","hulu.com","itunes.com",
"att.com","usbank.com","gmail.com","etsy.com",'facebook.com','opb.org',
"mtv.com","wwf.org","sony.com","parthenonsoftware.com","co-op.org"]
totalLen = max([len(host) for host in hosts])+1
shuffle(hosts)
class ThreadUrl(Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
errorOut = False
host = self.queue.get()
try:
url = urlopen("http://%s"%host)
url.read(1024)
except Exception as e:
errorOut = str(e)[:35]
self.queue.task_done()
q_sum.put((time()-start, (host, (time()-start), errorOut)))
class mySummer(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = q_sum
self.lstOut = []
self.lstTotal = []
def run(self):
while True:
myNum, tplOut = self.queue.get()
self.lstTotal.append(myNum)
self.lstOut.append(tplOut)
self.queue.task_done()
def total(self, intEnd=False):
if not intEnd: intEnd = len(self.lstTotal)
return sum(self.lstTotal[len(self.lstTotal)-intEnd:])
def doIt(intWorkers):
#spawning pools of threads, and passing them the q_url instance
for i in range(intWorkers):
t = ThreadUrl(q_url)
t.setDaemon(True)
t.start()
#populate queue with data
for host in hosts:
q_url.put(host)
#now that is going, we only need one thread for summing
mySum = mySummer(q_sum)
mySum.setDaemon(True)
mySum.start()
#wait on the queues until everything has been processed
q_url.join()
q_sum.join()
return mySum.total(intWorkers), mySum.lstOut
#build the Queues
q_url = Queue()
q_sum = Queue()
#myWorkers = 1 #single threaded!
myWorkers = len(hosts)
#myWorkers = 5
print("Fetching %i URLS with %i workers:"%(len(hosts),myWorkers))
start = time()
lTime, lstOut = doIt(myWorkers)
eTime = time() - start
for (host, eTime, error)in lstOut:
print ("%s:%.02f%s"%(host.ljust(totalLen),eTime,":E:%s"%str(error) if error else ""))
print "eTime: %.02f, sum:%.02f: %.02f times faster, %.02f%% deltaTime/thread"%(eTime, lTime, abs(eTime-lTime)/eTime, (eTime-lTime)*100/(eTime*myWorkers))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment