Skip to content

Instantly share code, notes, and snippets.

@7yan00
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save 7yan00/dbb8a6db45a0d086abb3 to your computer and use it in GitHub Desktop.
Save 7yan00/dbb8a6db45a0d086abb3 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
var stock = []string{}
var base string = "http://golang.org/"
func main() {
doc, _ := goquery.NewDocument(base)
var result []*url.URL
doc.Find("a").Each(func(_ int, s *goquery.Selection) {
target, _ := s.Attr("href")
base, _ := url.Parse(base)
targets, _ := url.Parse(target)
result = append(result, base.ResolveReference(targets))
})
results := GetUrl(result)
fmt.Println(results)
for len(results) > 0 {
results = GetUrl(results)
fmt.Println(results)
}
}
func GetUrl(urls []*url.URL) []*url.URL {
sorceUrl := []*url.URL{}
L:
for _, urrrl := range urls {
urrl := urrrl.String()
for e := 0; e < len(stock); e++ {
if urrl == stock[e] {
continue L
}
}
if !strings.Contains(urrl, base) {
break
}
stock = append(stock, urrl)
doc, _ := goquery.NewDocument(urrl)
var results []*url.URL
doc.Find("a").Each(func(_ int, s *goquery.Selection) {
target, _ := s.Attr("href")
base, _ := url.Parse(urrl)
targets, _ := url.Parse(target)
results = append(results, base.ResolveReference(targets))
sorceUrl = append(sorceUrl, results...)
})
}
return sorceUrl
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment