Last active
October 13, 2015 00:58
-
-
Save birkin/4114711 to your computer and use it in GitHub Desktop.
python threading experimentation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Queue, threading, urllib2 | |
from datetime import datetime | |
'''queue syntax assumes python 2.5 or greater''' | |
CHARACTERS = 50 | |
timer_list = [] | |
class ThreadUrl(threading.Thread): | |
'''Threaded Url Grab''' | |
def __init__(self, queue): | |
threading.Thread.__init__(self) | |
self.queue = queue | |
def run(self): | |
thread_start_time = datetime.now() | |
print u'%s starting at time: %s\n---\n' % ( self.getName(), thread_start_time ) | |
while True: | |
## grab host from queue | |
host = self.queue.get() | |
## access url and print snippet | |
f = urllib2.urlopen( host ) | |
html = f.read().strip() # amazon has lots of blank space at top | |
print u'From thread/host "%s"/"%s", some output: \n%s\n--' % ( self.getName(), host, html[0:CHARACTERS] ) | |
## signal to queue job is done | |
thread_end_time = datetime.now() | |
time_taken = thread_end_time - thread_start_time | |
timer_list.append( time_taken ) | |
print u"%s ending at time: %s; took %s seconds\n---\n---\n\n" % ( self.getName(), thread_end_time, time_taken ) | |
self.queue.task_done() | |
if __name__ == '__main__': | |
## setup | |
hosts = [ | |
u'http://www.amazon.com/', | |
u'http://arstechnica.com', | |
u'http://www.apple.com/', | |
u'http://brown.edu/', | |
u'http://www.google.com/', | |
u'http://www.ibm.com/', | |
u'http://library.brown.edu/', | |
u'http://www.whitehouse.gov/', | |
u'http://en.m.wikipedia.org', | |
u'http://www.yahoo.com/', | |
] | |
queue = Queue.Queue() | |
whole_job_start = datetime.now() | |
## spawn pool of threads, and pass each a queue instance | |
for i in range( len(hosts) ): | |
t = ThreadUrl( queue ) | |
t.setDaemon( True ) | |
t.start() | |
## populate queue | |
for host in hosts: | |
queue.put(host) | |
## wait on queue until everything has been processed | |
queue.join() | |
## summary output | |
print u'\n-------' | |
print u"- Total-time: %s seconds" % ( datetime.now() - whole_job_start ) | |
total_time = None | |
for time in timer_list: | |
if total_time == None: | |
total_time = time | |
else: | |
total_time = total_time + time | |
print u'- Total-time if non-threaded WOULD HAVE been: %s' % total_time | |
print u'\n-------' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment