Skip to content

Instantly share code, notes, and snippets.

@jjjake
Created October 29, 2015 18:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjjake/e1ca9bda667f693762c5 to your computer and use it in GitHub Desktop.
Save jjjake/e1ca9bda667f693762c5 to your computer and use it in GitHub Desktop.
An Archive.org metadata miner written in Go.
package main
import (
"os"
"bufio"
"crypto/tls"
"net/http"
"io/ioutil"
"fmt"
"time"
)
type Resp struct {
*http.Response
err error
}
func fetch(uri string, queue chan string) {
tr := &http.Transport{
TLSClientConfig: &tls.Config{},
}
client := http.Client{Transport: tr}
resp, err := client.Get(uri)
defer resp.Body.Close()
if err != nil {
// re-queue
queue <- uri
return
}
r := Resp{resp, err}
body, _ := ioutil.ReadAll(r.Body)
fmt.Println(string(body))
}
func q(itemlist string, queue chan string) {
file, _ := os.Open(itemlist)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
uri := "http://archive.org/metadata/" + scanner.Text()
queue <- uri
}
return
}
func main() {
// rate-limiter
rate := time.Second / 500
throttle := time.Tick(rate)
queue := make(chan string)
go q("itemlist.txt", queue)
for uri := range queue {
<-throttle
go fetch(uri, queue)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment