Skip to content

Instantly share code, notes, and snippets.

@go-ive
Created February 10, 2014 20:45
Show Gist options
  • Save go-ive/8923822 to your computer and use it in GitHub Desktop.
Save go-ive/8923822 to your computer and use it in GitHub Desktop.
4chan image downloader in Go
package main
import (
"code.google.com/p/go.net/html"
"flag"
"fmt"
"github.com/ziutek/mymysql/mysql"
_ "github.com/ziutek/mymysql/native"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
)
var SUB = "/b/"
func main() {
flag.Parse()
SUB = "/" + flag.Args()[0] + "/"
// connects to db
db := mysql.New("tcp", "", "localhost:3306", "user", "pw", "db")
err := db.Connect()
if err != nil {
panic(err)
}
// get filenames
imageUrls := getImageSrcFromPage("http://4chan.org" + SUB)
filenames := make([]string, 0)
for _, ur := range imageUrls {
filenames = append(filenames, strings.Replace(ur, "//", "http://", -1))
}
stmt, _ := db.Prepare("insert into files values (?)")
// download files if not in db
for _, fn := range filenames {
rows, _, _ := db.Query("select * from files where name = '%s'", fn)
if len(rows) < 1 {
_, _ = stmt.Run(fn)
fmt.Println("Processing " + fn)
response, _ := http.Get(fn)
// check if directory exists
_, err := os.Stat("./pics" + SUB)
if err != nil {
// create path
os.Mkdir("./pics"+SUB, 0777)
}
out, err := os.Create("./pics" + SUB + strings.Split(fn, "/")[5])
if err != nil {
fmt.Println(err)
}
io.Copy(out, response.Body)
out.Close()
response.Body.Close()
}
}
db.Close()
}
func getImageSrcFromPage(url string) []string {
response, err := http.Get(url)
if err != nil {
return nil
}
defer response.Body.Close()
bodyString, _ := ioutil.ReadAll(response.Body)
body := string(bodyString)
return collectUrls(body)
}
func collectUrls(body string) []string {
document, err := html.Parse(strings.NewReader(body))
if err != nil {
log.Printf("Error parsing urls for %s\n", body)
}
urls := make([]string, 0)
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "A") {
for _, a := range n.Attr {
if a.Key == "href" || a.Key == "HREF" {
if strings.Contains(a.Val, "//i.4cdn.org"+SUB+"src/") {
urls = append(urls, a.Val)
break
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(document)
return urls
}
@go-ive
Copy link
Author

go-ive commented Feb 10, 2014

run with:
go run 4chanloader.go

example:
go run 4chanloader.go b
go run 4chanloader.go ck

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment