Skip to content

Instantly share code, notes, and snippets.

@demetriusnunes
Last active October 8, 2020 17:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save demetriusnunes/d2cef3cd249167ac94400fc591d31f03 to your computer and use it in GitHub Desktop.
Save demetriusnunes/d2cef3cd249167ac94400fc591d31f03 to your computer and use it in GitHub Desktop.
------------------------------
import List, Queue, Hash from lang.data
import fetch, normalize_url from http.utils
import write_file from lang.io
function crawl(start_url) {
crawled = new List
queue = new Queue
visited = new Hash
start_url = normalize_url(start_url)
queue.push(start_url)
while (not queue.empty?) {
url = queue.pop()
page = fetch(url)
visited[url] = true
for asset in page.assets {
data = fetch(asset)
write_file(data.filename, data.bytes)
}
for link in page.links {
link = normalize_url(link)
queue.push(link) if not visited[link]
}
crawled.append({ url, page.assets })
}
return crawled
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment