Skip to content

Instantly share code, notes, and snippets.

@russellbeattie
Last active December 15, 2015 05:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save russellbeattie/5209133 to your computer and use it in GitHub Desktop.
Save russellbeattie/5209133 to your computer and use it in GitHub Desktop.
package main
import (
_ "github.com/bmizerany/pq"
"database/sql"
"net/http"
"log"
"net"
"time"
"strconv"
"sync"
"io/ioutil"
)
var wg sync.WaitGroup
func getConn() *sql.DB {
db, _ := sql.Open("postgres", "user=russell dbname=magnet sslmode=disable")
return db;
}
func main() {
db := getConn()
defer db.Close()
var total int
db.QueryRow("SELECT count(*) as total FROM allsources where errorcode != 404").Scan(&total)
log.Println("total", total)
routines := 5
limit := total / routines
log.Println("offset", limit)
for i := 0; i < routines; i++ {
wg.Add(1)
go fetchUrls(i, limit);
}
wg.Wait()
db.Close()
}
func fetchUrls(procnum int, limit int){
db := getConn()
defer db.Close()
sql := "select id, sourceurl from allsources where errorcode !=404 order by id limit " + strconv.Itoa(limit) + " offset " + strconv.Itoa(limit * procnum)
// log.Println(sql)
rows, err := db.Query(sql)
if err != nil {
log.Println(err)
wg.Done()
return
}
client := &http.Client{
Transport: &http.Transport{
Dial: timeoutDialler(30),
},
}
for rows.Next() {
var id int
var url string
rows.Scan(&id, &url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Println(err)
}
req.Header.Set("User-Agent", "Feedbot")
resp, err := client.Do(req)
if err != nil {
log.Println(id, " ", "TIMEOUT", url, "(", procnum , ")")
} else {
defer resp.Body.Close()
log.Println(id, resp.StatusCode, url, "(", procnum , ")")
if(resp.StatusCode == 404){
_, err = db.Exec("update allsources set errorcode = 404 where id = $1", id)
if err != nil {
log.Println("Update error", err)
}
}
if(resp.StatusCode == 200){
body, _ := ioutil.ReadAll(resp.Body)
//log.Println(string(id), string(body));
_, err = db.Exec("update allsources set lastupdate = now(), cache = $1 where id = $2", string(body), id)
if err != nil {
log.Println("Update error", err)
}
}
}
}
wg.Done()
}
func timeoutDialler(ns int64) func(net, addr string) (c net.Conn, err error) {
return func(netw, addr string) (net.Conn, error) {
c, err := net.Dial(netw, addr)
if err != nil {
return nil, err
}
c.SetDeadline(time.Now().Add(time.Duration(ns)*time.Second))
return c, nil
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment