Skip to content

Instantly share code, notes, and snippets.

@circa10a
Created May 20, 2020 00:47
Show Gist options
  • Save circa10a/c61eb57d04271c85a3feb86866a5c6c1 to your computer and use it in GitHub Desktop.
Save circa10a/c61eb57d04271c85a3feb86866a5c6c1 to your computer and use it in GitHub Desktop.
basic usage of colly go library
package main
import (
"fmt"
"github.com/gocolly/colly/v2"
)
func main() {
c := colly.NewCollector(
// MaxDepth is 2, so only the links on the scraped page
// and links on those pages are visited
colly.MaxDepth(2),
)
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
e.Request.Visit(link)
})
c.OnRequest(func(r *colly.Request) {
r.Ctx.Put("url", r.URL.String())
})
c.OnResponse(func(r *colly.Response) {
url := r.Ctx.Get("url")
fmt.Println((colly.SanitizeFileName(url)))
r.Save(colly.SanitizeFileName(url))
})
c.Visit("https://caleblemoine.dev/")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment