Skip to content

Instantly share code, notes, and snippets.

@guelfey
Created March 26, 2013 09:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guelfey/5244200 to your computer and use it in GitHub Desktop.
Save guelfey/5244200 to your computer and use it in GitHub Desktop.
// wikicrawler - check links in the Go projects wiki.
//
// It basically searches its stdin for anything looking like an HTTP address and
// tries to get it.
//
// Usage: curl http://wiki.go-wiki.googlecode.com/hg/Projects.wiki | wikicrawler
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"runtime"
)
// taken from plan9port's plumbing
var re = regexp.MustCompile(`https?://[a-zA-Z0-9_@\-]+([.:][a-zA-Z0-9_@\-]+)*/?[a-zA-Z0-9_?,%#~&/\-+=]+([:.][a-zA-Z0-9_?,%#~&/\-+=]+)*`)
func check(in chan string, done chan bool) {
for v := range in {
resp, err := http.Get(v)
if err != nil {
fmt.Fprintln(os.Stderr, v, err)
continue
}
resp.Body.Close()
if resp.StatusCode != 200 {
fmt.Fprintln(os.Stderr, v, resp.Status)
}
}
done <- true
}
func main() {
num := runtime.NumCPU()
runtime.GOMAXPROCS(num)
buf, err := ioutil.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
s := re.FindAll(buf, -1)
c := make(chan string)
done := make(chan bool, 8)
for i := 0; i < num; i++ {
go check(c, done)
}
for _, v := range s {
c <- string(v)
}
close(c)
for i := 0; i < num; i++ {
<-done
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment