Skip to content

Instantly share code, notes, and snippets.

@azyobuzin
Forked from 7yan00/imageClowler.go
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save azyobuzin/7f6fd823c37b95642d2f to your computer and use it in GitHub Desktop.
Save azyobuzin/7f6fd823c37b95642d2f to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"io"
"net/http"
"net/url"
"os"
"strings"
"sync"
"github.com/PuerkitoBio/goquery"
)
var stock = []string{}
var base = "http://blog.golang.org/"
var i int
var wg = new(sync.WaitGroup)
func main() {
doc, _ := goquery.NewDocument(base)
results := makeUrl(doc)
for len(results) > 0 {
results = GetUrl(results)
}
fmt.Println(results)
wg.Wait()
}
func containsInStock(value string) bool {
l := len(stock)
for i := 0; i < l; i++ {
if stock[i] == value {
return true
}
}
return false
}
func GetUrl(urls []*url.URL) []*url.URL {
sorceUrl := []*url.URL{}
L:
for _, item := range urls {
url_string := item.String()
if !strings.Contains(url_string, base) {
continue L
}
if containsInStock(url_string) {
continue L
}
fmt.Println(url_string)
stock = append(stock, url_string)
doc, _ := goquery.NewDocument(base)
results := makeUrl(doc)
wg.Add(1)
go GetImage(doc)
sorceUrl = append(sorceUrl, results...)
}
fmt.Println("hohohohohohohohohoho")
return sorceUrl
}
func makeUrl(doc *goquery.Document) []*url.URL {
var result []*url.URL
doc.Find("a").Each(func(_ int, s *goquery.Selection) {
target, _ := s.Attr("href")
base, _ := url.Parse(base)
targets, _ := url.Parse(target)
result = append(result, base.ResolveReference(targets))
})
return result
}
func GetImage(doc *goquery.Document) {
var result []*url.URL
doc.Find("img").Each(func(_ int, s *goquery.Selection) {
target, _ := s.Attr("src")
base, _ := url.Parse(base)
targets, _ := url.Parse(target)
result = append(result, base.ResolveReference(targets))
})
for _, imageUrl := range result {
imageUrl_String := imageUrl.String()
if containsInStock(imageUrl_String) {
continue
}
stock = append(stock, imageUrl_String)
response, err := http.Get(imageUrl_String)
if err != nil {
panic(err)
}
defer response.Body.Close()
file, err := os.Create(fmt.Sprintf("hoge%d.jpg", i))
i++
if err != nil {
panic(err)
}
defer file.Close()
io.Copy(file, response.Body)
}
wg.Done()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment