wilbeibi/download all pdf in single web page

## download all pdf in single web page
from pyquery import PyQuery as pq
from multiprocessing.dummy import Pool as ThreadPool
import urllib

class Download():
    def __init__(self, ext, url):
        self.ext = ext
        self.url = url
    def getlinks(self):
        # @return list of files
        res = []
        data = pq(url=self.url) # url
        links = data('a')
        for link in links:
            href_file = pq(link).attr('href')
            if not href_file or len(href_file) < 4:
                continue
            if href_file[-4:] == '.' + self.ext:
                res.append(href_file)
        return res

    def download_single(self, link):
        fname = link.rsplit('/', 1)[1]
        urllib.urlretrieve(link, fname)
        print 'download %s success' % (fname)


    def download_all(self):
        links = self.getlinks()
        pool = ThreadPool(len(links)/3)
        pool.map(self.download_single, links)

if __name__ == '__main__':
    down = Download('pdf', 'http://netsmell.com/principles-of-distributed-computing-lecture-collection.html')
    down.download_all()
	from pyquery import PyQuery as pq
	from multiprocessing.dummy import Pool as ThreadPool
	import urllib

	class Download():
	def __init__(self, ext, url):
	self.ext = ext
	self.url = url
	def getlinks(self):
	# @return list of files
	res = []
	data = pq(url=self.url) # url
	links = data('a')
	for link in links:
	href_file = pq(link).attr('href')
	if not href_file or len(href_file) < 4:
	continue
	if href_file[-4:] == '.' + self.ext:
	res.append(href_file)
	return res

	def download_single(self, link):
	fname = link.rsplit('/', 1)[1]
	urllib.urlretrieve(link, fname)
	print 'download %s success' % (fname)


	def download_all(self):
	links = self.getlinks()
	pool = ThreadPool(len(links)/3)
	pool.map(self.download_single, links)

	if __name__ == '__main__':
	down = Download('pdf', 'http://netsmell.com/principles-of-distributed-computing-lecture-collection.html')
	down.download_all()