Skip to content

Instantly share code, notes, and snippets.

@docete
Created December 10, 2012 15:23
Show Gist options
  • Save docete/4251227 to your computer and use it in GitHub Desktop.
Save docete/4251227 to your computer and use it in GitHub Desktop.
synchronization primitive: semaphore example (resource limitation)
#!/usr/bin/env python
# -*- coding: utf8 -*-
import threading
import time
import random
sema = threading.Semaphore(2)
class Grab(threading.Thread):
def __init__(self, url):
threading.Thread.__init__(self)
self.url = url
def run(self):
try:
k = random.randint(10, 20)
print "fetch page: %s, waiting for: %ds" % (self.url, k)
time.sleep(k)
print "page %s done" % self.url
finally:
sema.release()
class Crawler(threading.Thread):
def __init__(self, hosts):
threading.Thread.__init__(self)
self.hosts = hosts
def run(self):
for url in self.hosts:
sema.acquire()
g = Grab(url)
g.start()
print "Starting Crawling ..."
hosts = ['www.google.com', 'www.baidu.com', 'www.163.com', 'www.sina.com.cn', 'www.sohu.com', 'www.tencent.com']
c = Crawler(hosts)
c.start()
c.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment