Skip to content

Instantly share code, notes, and snippets.

@kuozo
Created November 22, 2013 12:08
Show Gist options
  • Save kuozo/7598917 to your computer and use it in GitHub Desktop.
Save kuozo/7598917 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"strings"
"strconv"
"regexp"
"io/ioutil"
"net/http"
)
// define web web address and pages.
const (
web_address string = "http://kfxx.info/mm.php"
root_addr string = "http://kfxx.info/"
save_location string = "your storage pic address"
)
func generate_url(index int) (url string) {
url = web_address + "?page=" + strconv.Itoa(index)
return url
}
func findAllPics(content string) (pics []string){
re, _ := regexp.Compile(`assets/mm/(.*).jpg`)
matchs := re.FindAllStringSubmatch(content, -1)
for _, item := range matchs {
pics = append(pics, string(item[0]));
}
return pics
}
func savePic(pic string){
url := root_addr + pic
resp, err := http.Get(url)
if err != nil {
return
}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
file_name := save_location + getFileName(pic)
fmt.Println(file_name)
ioutil.WriteFile(file_name, [] byte(string(body)), 0644)
}
func getFileName(pic string) string{
tmp := strings.Split(pic, "/")
fmt.Println(tmp)
return string(tmp[len(tmp) - 1])
}
func fetchPage() {
for i := 1; i <= 5; i++ {
url := generate_url(i)
resp, err := http.Get(url)
if err != nil {
fmt.Println("can not fetch the web address", url)
return
}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
// do something here.
fmt.Printf("url:%s can access\n", url)
pics := findAllPics(string(body))
for _, item := range pics {
go savePic(item)
}
}
}
func main() {
fmt.Println("Starting fetch web from: ", web_address)
fetchPage()
}
@kuozo
Copy link
Author

kuozo commented Nov 22, 2013

用土办法写的一个爬妹纸的图
尝试用Golang写东西,才发现变量定义和字符串的引号问题是一个大问题

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment