Skip to content

Instantly share code, notes, and snippets.

Created April 30, 2011 13:28
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
import mechanize
from BeautifulSoup import BeautifulSoup
class Dmoz(object):
def __init__(self): = mechanize.Browser()
def get_page_urls(self, term):
result =""+term)
result_html =
soup = BeautifulSoup(result_html)
sites_obj = soup.find('ol', {"class": "site"})
if sites_obj:
sites = sites_obj('li')
urls = [x('a', recursive=False)[0]['href'] for x in sites]
return urls
return []
def main():
# eg:
dm = Dmoz()
print dm.get_page_urls("Computer Science")
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment