def crawl_web(seed):
tocrawl = [seed]
crawled = []
index = []
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
content = get_page(page)
add_page_to_index(index, page, content)
union(tocrawl,get_all_links(content))
crawled.append(page)
return index
Created
July 17, 2016 01:33
-
-
Save babjo/d0de36796f8511c5461c3a4d1a79ea41 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment