docete/semaphore.py

## semaphore.py
#!/usr/bin/env python
# -*- coding: utf8 -*-

import threading
import time
import random

sema = threading.Semaphore(2)

class Grab(threading.Thread):
    def __init__(self, url):
        threading.Thread.__init__(self)
        self.url = url

    def run(self):
        try:
            k = random.randint(10, 20)
            print "fetch page: %s, waiting for: %ds" % (self.url, k)
            time.sleep(k)
            print "page %s done" % self.url
        finally:
            sema.release()

class Crawler(threading.Thread):
    def __init__(self, hosts):
        threading.Thread.__init__(self)
        self.hosts = hosts

    def run(self):
        for url in self.hosts:
            sema.acquire()
            g = Grab(url)
            g.start()

print "Starting Crawling ..."
hosts = ['www.google.com', 'www.baidu.com', 'www.163.com', 'www.sina.com.cn', 'www.sohu.com', 'www.tencent.com']
c = Crawler(hosts)
c.start()
c.join()
	#!/usr/bin/env python
	# -- coding: utf8 --

	import threading
	import time
	import random

	sema = threading.Semaphore(2)

	class Grab(threading.Thread):
	def __init__(self, url):
	threading.Thread.__init__(self)
	self.url = url

	def run(self):
	try:
	k = random.randint(10, 20)
	print "fetch page: %s, waiting for: %ds" % (self.url, k)
	time.sleep(k)
	print "page %s done" % self.url
	finally:
	sema.release()

	class Crawler(threading.Thread):
	def __init__(self, hosts):
	threading.Thread.__init__(self)
	self.hosts = hosts

	def run(self):
	for url in self.hosts:
	sema.acquire()
	g = Grab(url)
	g.start()

	print "Starting Crawling ..."
	hosts = ['www.google.com', 'www.baidu.com', 'www.163.com', 'www.sina.com.cn', 'www.sohu.com', 'www.tencent.com']
	c = Crawler(hosts)
	c.start()
	c.join()