markwatson/Dmoz.py

## Dmoz.py
import mechanize
from BeautifulSoup import BeautifulSoup

class Dmoz(object):
    def __init__(self):
        self.br = mechanize.Browser()

    def get_page_urls(self, term):
        result = self.br.open("http://www.dmoz.org/search?q="+term)
        result_html = result.read()
        soup = BeautifulSoup(result_html)
        sites_obj = soup.find('ol', {"class": "site"})
        if sites_obj:
            sites = sites_obj('li')
            urls = [x('a', recursive=False)[0]['href'] for x in sites]
            return urls
        else:
            return []

def main():
    # eg:
    dm = Dmoz()
    print dm.get_page_urls("Computer Science")

if __name__ == "__main__":
    main()
	import mechanize
	from BeautifulSoup import BeautifulSoup

	class Dmoz(object):
	def __init__(self):
	self.br = mechanize.Browser()

	def get_page_urls(self, term):
	result = self.br.open("http://www.dmoz.org/search?q="+term)
	result_html = result.read()
	soup = BeautifulSoup(result_html)
	sites_obj = soup.find('ol', {"class": "site"})
	if sites_obj:
	sites = sites_obj('li')
	urls = [x('a', recursive=False)[0]['href'] for x in sites]
	return urls
	else:
	return []

	def main():
	# eg:
	dm = Dmoz()
	print dm.get_page_urls("Computer Science")

	if __name__ == "__main__":
	main()