Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
package main
import (
"fmt"
)
type Fetcher interface {
// Fetch returns the body of URL and
// a slice of URLs found on that page.
Fetch(url string) (body string, urls []string, err error)
}
type unprocessed struct {
depth int
url[] string
}
func getPage(url string, depth int, r chan unprocessed){
body, urls, err := fetcher.Fetch(url)
fmt.Printf("found: %s %q\n", url, body)
if err != nil {
fmt.Println(err)
}
r <- unprocessed{ depth - 1, urls }
}
func Crawl(url string, depth int, fetcher Fetcher) {
//setup channel for inputs to be processed
up := make(chan unprocessed, 0)
//kick off processing and count how many pages are left to process
go getPage(url, depth, up)
outstanding := 1
visited := make(map[string]bool)
for outstanding > 0 {
//pop a visit from the channel
next := <-up
outstanding--
//if were too deep, skip it
if next.depth <= 0 {
continue
}
//loop over all urls to visit from that page
for _, link := range next.url {
//check we havent visited them before
if visited[link] {
continue
}
//all good to visit them
outstanding++
visited[link] = true
go getPage(link, depth, up)
}
}
}
func main() {
Crawl("http://golang.org/", 4, fetcher)
}
// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"http://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"http://golang.org/pkg/",
"http://golang.org/cmd/",
},
},
"http://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"http://golang.org/",
"http://golang.org/cmd/",
"http://golang.org/pkg/fmt/",
"http://golang.org/pkg/os/",
},
},
"http://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
"http://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.