Last active
December 15, 2015 05:29
-
-
Save russellbeattie/5209133 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
_ "github.com/bmizerany/pq" | |
"database/sql" | |
"net/http" | |
"log" | |
"net" | |
"time" | |
"strconv" | |
"sync" | |
"io/ioutil" | |
) | |
var wg sync.WaitGroup | |
func getConn() *sql.DB { | |
db, _ := sql.Open("postgres", "user=russell dbname=magnet sslmode=disable") | |
return db; | |
} | |
func main() { | |
db := getConn() | |
defer db.Close() | |
var total int | |
db.QueryRow("SELECT count(*) as total FROM allsources where errorcode != 404").Scan(&total) | |
log.Println("total", total) | |
routines := 5 | |
limit := total / routines | |
log.Println("offset", limit) | |
for i := 0; i < routines; i++ { | |
wg.Add(1) | |
go fetchUrls(i, limit); | |
} | |
wg.Wait() | |
db.Close() | |
} | |
func fetchUrls(procnum int, limit int){ | |
db := getConn() | |
defer db.Close() | |
sql := "select id, sourceurl from allsources where errorcode !=404 order by id limit " + strconv.Itoa(limit) + " offset " + strconv.Itoa(limit * procnum) | |
// log.Println(sql) | |
rows, err := db.Query(sql) | |
if err != nil { | |
log.Println(err) | |
wg.Done() | |
return | |
} | |
client := &http.Client{ | |
Transport: &http.Transport{ | |
Dial: timeoutDialler(30), | |
}, | |
} | |
for rows.Next() { | |
var id int | |
var url string | |
rows.Scan(&id, &url) | |
req, err := http.NewRequest("GET", url, nil) | |
if err != nil { | |
log.Println(err) | |
} | |
req.Header.Set("User-Agent", "Feedbot") | |
resp, err := client.Do(req) | |
if err != nil { | |
log.Println(id, " ", "TIMEOUT", url, "(", procnum , ")") | |
} else { | |
defer resp.Body.Close() | |
log.Println(id, resp.StatusCode, url, "(", procnum , ")") | |
if(resp.StatusCode == 404){ | |
_, err = db.Exec("update allsources set errorcode = 404 where id = $1", id) | |
if err != nil { | |
log.Println("Update error", err) | |
} | |
} | |
if(resp.StatusCode == 200){ | |
body, _ := ioutil.ReadAll(resp.Body) | |
//log.Println(string(id), string(body)); | |
_, err = db.Exec("update allsources set lastupdate = now(), cache = $1 where id = $2", string(body), id) | |
if err != nil { | |
log.Println("Update error", err) | |
} | |
} | |
} | |
} | |
wg.Done() | |
} | |
func timeoutDialler(ns int64) func(net, addr string) (c net.Conn, err error) { | |
return func(netw, addr string) (net.Conn, error) { | |
c, err := net.Dial(netw, addr) | |
if err != nil { | |
return nil, err | |
} | |
c.SetDeadline(time.Now().Add(time.Duration(ns)*time.Second)) | |
return c, nil | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment