Skip to content

Instantly share code, notes, and snippets.

@alpinskiy
Created September 11, 2015 14:16
Show Gist options
  • Save alpinskiy/21e8b4bee98dce80fec8 to your computer and use it in GitHub Desktop.
Save alpinskiy/21e8b4bee98dce80fec8 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"golang.org/x/net/html"
"log"
"net/http"
"net/url"
"os"
)
func main() {
argsWithoutProg := os.Args[1:]
if len(argsWithoutProg) != 1 {
fmt.Printf("Usage: %v <url>\n", os.Args[0])
return
}
url := argsWithoutProg[0]
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
log.Fatal(err)
}
for _, url := range getInternalHrefs(doc, resp.Request.URL) {
fmt.Println(url.String())
}
}
func getInternalHrefs(n *html.Node, base *url.URL) []url.URL {
var internalLinks []url.URL
for _, url := range getHrefs(n) {
if isInternal(&url, base) {
internalLinks = append(internalLinks, url)
}
}
return internalLinks
}
func getHrefs(n *html.Node) []url.URL {
return gatherHrefs(n, nil)
}
func gatherHrefs(n *html.Node, links []url.URL) []url.URL {
if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "link") {
for _, attr := range n.Attr {
if attr.Key == "href" {
if url, err := url.Parse(attr.Val); err == nil && url.Scheme != "javascript" {
links = append(links, *url)
}
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = gatherHrefs(c, links)
}
return links
}
func isInternal(u *url.URL, base *url.URL) bool {
if !u.IsAbs() {
return true
}
return u.Host == base.Host
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment