Skip to content

Instantly share code, notes, and snippets.

@binq
Last active March 26, 2019 00:47
Show Gist options
  • Save binq/be2b20997f0017620e2bd40f9a446b37 to your computer and use it in GitHub Desktop.
Save binq/be2b20997f0017620e2bd40f9a446b37 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
)
type DoneUrlsMux struct {
v map[string]bool
mux sync.Mutex
}
func (c *DoneUrlsMux) Set(key string) (succ bool) {
c.mux.Lock()
defer c.mux.Unlock()
if _, found := c.v[key]; found {
succ = false
return
} else {
c.v[key] = true
succ = true
return
}
}
type Counter struct {
count *int64
done chan bool
}
func (c *Counter) Init() (succ bool) {
if nil == c.count {
c.count = &[]int64{1}[0]
succ = true
} else {
succ = false
}
return
}
func (c *Counter) Add(delta int) {
if atomic.AddInt64(c.count, int64(delta)); 0 == *c.count {
c.done <- true
}
}
type Fetcher interface {
Fetch(url string) (body string, urls []string, err error)
}
var counter = Counter{done: make(chan bool, 1)}
var doneUrlsMux = DoneUrlsMux{v: make(map[string]bool)}
func Crawl(url string, depth int, fetcher Fetcher) {
if (counter.Init()) {
defer func () {
<-counter.done
}()
}
defer counter.Add(-1)
if depth <= 0 {
return
}
if succ := doneUrlsMux.Set(url); false == succ {
return
}
body, urls, err := fetcher.Fetch(url)
if nil != err {
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)
counter.Add(len(urls))
for _, url := range urls {
go Crawl(url, depth-1, fetcher)
}
}
func main() {
Crawl("https://golang.org/", 4, fetcher)
}
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if d, e := time.ParseDuration(fmt.Sprintf("%vms", rand.Intn(10)+1)); nil == e {
time.Sleep(d)
} else {
panic("Invalid duration")
}
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"https://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"https://golang.org/pkg/",
"https://golang.org/cmd/",
},
},
"https://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"https://golang.org/",
"https://golang.org/cmd/",
"https://golang.org/pkg/fmt/",
"https://golang.org/pkg/os/",
},
},
"https://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
"https://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment