Skip to content

Instantly share code, notes, and snippets.

@venkatd
Last active December 3, 2018 05:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save venkatd/14db735f4aaa1b4a79815471e57af95d to your computer and use it in GitHub Desktop.
Save venkatd/14db735f4aaa1b4a79815471e57af95d to your computer and use it in GitHub Desktop.
type linkCollector func(page crawler.Page) []string
func crawl(client crawler.Client, startPageURL string, collect linkCollector, maxConcurrency int) {
concurrencyLimiter := make(chan struct{}, maxConcurrency)
var wg sync.WaitGroup
for i := 0; i < maxConcurrency; i++ {
concurrencyLimiter <- struct{}{}
}
wg.Add(1)
go recursiveCrawl(client, startPageURL, collect, &wg, concurrencyLimiter)
wg.Wait()
}
func recursiveCrawl(client crawler.Client, pageURL string, collect linkCollector, wg *sync.WaitGroup, concurrencyLimiter chan struct{}) {
<-concurrencyLimiter
page := client.Get(pageURL)
links := collect(page)
for _, link := range links {
wg.Add(1)
go recursiveCrawl(client, link, collect, wg, concurrencyLimiter)
}
concurrencyLimiter <- struct{}{}
wg.Done()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment