Skip to content

Instantly share code, notes, and snippets.

@johnwesonga
Last active August 29, 2015 13:56
Show Gist options
  • Save johnwesonga/9112438 to your computer and use it in GitHub Desktop.
Save johnwesonga/9112438 to your computer and use it in GitHub Desktop.
URL Checker
package main
import (
"bufio"
"fmt"
"io"
"log"
"net/http"
"os"
"sync"
"time"
)
var (
filename = "urls.txt"
)
type timedResponse struct {
*http.Response
}
func getUrls(file io.Reader) <-chan string {
c := make(chan string)
go func() {
scanner := bufio.NewScanner(file)
for scanner.Scan() {
c <- scanner.Text()
}
close(c)
}()
return c
}
func urlResponse(urls <-chan string) chan *timedResponse {
c := make(chan *timedResponse)
go func() {
// refer to http://golang.org/pkg/sync/#WaitGroup
var wg sync.WaitGroup
for url := range urls {
wg.Add(1)
// create a goroutine for each http.Head
go func(url string) {
defer wg.Done()
// replace http.Get with http.Head since all we care about is the status line
resp, err := http.Head(url)
if err != nil {
log.Fatal(err)
}
if resp.StatusCode == 404 {
c <- &timedResponse{resp}
}
}(url)
}
wg.Wait()
close(c)
}()
return c
}
func main() {
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer file.Close()
fwrite, err := os.Create("output.csv")
if err != nil {
log.Fatal(err)
}
defer fwrite.Close()
start := time.Now()
for req := range urlResponse(getUrls(file)) {
fmt.Println(req.Request.URL, req.StatusCode)
_, err := fwrite.WriteString(req.Request.URL.String() + string('\n'))
if err != nil {
panic(err)
}
}
fmt.Println(time.Now().Sub(start))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment