Skip to content

Instantly share code, notes, and snippets.

@birkin
Last active October 13, 2015 00:58
Show Gist options
  • Save birkin/4114711 to your computer and use it in GitHub Desktop.
Save birkin/4114711 to your computer and use it in GitHub Desktop.
python threading experimentation
import Queue, threading, urllib2
from datetime import datetime
'''queue syntax assumes python 2.5 or greater'''
CHARACTERS = 50
timer_list = []
class ThreadUrl(threading.Thread):
'''Threaded Url Grab'''
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
thread_start_time = datetime.now()
print u'%s starting at time: %s\n---\n' % ( self.getName(), thread_start_time )
while True:
## grab host from queue
host = self.queue.get()
## access url and print snippet
f = urllib2.urlopen( host )
html = f.read().strip() # amazon has lots of blank space at top
print u'From thread/host "%s"/"%s", some output: \n%s\n--' % ( self.getName(), host, html[0:CHARACTERS] )
## signal to queue job is done
thread_end_time = datetime.now()
time_taken = thread_end_time - thread_start_time
timer_list.append( time_taken )
print u"%s ending at time: %s; took %s seconds\n---\n---\n\n" % ( self.getName(), thread_end_time, time_taken )
self.queue.task_done()
if __name__ == '__main__':
## setup
hosts = [
u'http://www.amazon.com/',
u'http://arstechnica.com',
u'http://www.apple.com/',
u'http://brown.edu/',
u'http://www.google.com/',
u'http://www.ibm.com/',
u'http://library.brown.edu/',
u'http://www.whitehouse.gov/',
u'http://en.m.wikipedia.org',
u'http://www.yahoo.com/',
]
queue = Queue.Queue()
whole_job_start = datetime.now()
## spawn pool of threads, and pass each a queue instance
for i in range( len(hosts) ):
t = ThreadUrl( queue )
t.setDaemon( True )
t.start()
## populate queue
for host in hosts:
queue.put(host)
## wait on queue until everything has been processed
queue.join()
## summary output
print u'\n-------'
print u"- Total-time: %s seconds" % ( datetime.now() - whole_job_start )
total_time = None
for time in timer_list:
if total_time == None:
total_time = time
else:
total_time = total_time + time
print u'- Total-time if non-threaded WOULD HAVE been: %s' % total_time
print u'\n-------'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment