import urllib2 def get_all_links(page): #extract all links from the html page def get_page(link): #get the html page def addTasks(tasks, newTasks): for task in newTasks: tasks.append(task) def crawl(seed) { tasks = [seed] visited = [] while(tasks): link = tasks.pop(); if link not in visited: page = get_page(link) links = get_all_links(page) addTasks(tasks, links) visited.append(link) #do something with the page... #maybe store in DB }