Skip to content

Instantly share code, notes, and snippets.

@alinuxsa
Last active April 17, 2019 01:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alinuxsa/c5f6f935db2806cdbb7898a2e482c1b1 to your computer and use it in GitHub Desktop.
Save alinuxsa/c5f6f935db2806cdbb7898a2e482c1b1 to your computer and use it in GitHub Desktop.
使用go并发下载bing壁纸
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"runtime"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
)
// BASE_URL 定义目标站点
const BASE_URL string = "https://bing.gifposter.com"
var (
// DOWNLOADDIR 根据系统平台自动设置
DOWNLOADDIR string
pages int
maxRoutineNum = 3
ch = make(chan string, maxRoutineNum)
client = &http.Client{
// Timeout 设置200秒超时,目标服务器网络较差
Timeout: time.Duration(200 * time.Second),
}
wg = sync.WaitGroup{}
)
func init() {
if runtime.GOOS == "windows" {
DOWNLOADDIR = "d:\\wallpaper\\"
} else {
DOWNLOADDIR = "/tmp/wallpaper/"
}
err := os.Mkdir(DOWNLOADDIR, 0755)
if err != nil {
log.Fatal(err)
}
log.Printf("壁纸将保存到 %s \n", DOWNLOADDIR)
}
func main() {
fmt.Println("输入要下载的页数: 例如 3 每页有32张图片")
fmt.Scanln(&pages)
start := time.Now()
urls := parseHTML(pages)
// fmt.Println(urls)
for _, url := range urls {
wg.Add(1)
go saveImg(url, ch)
}
wg.Wait()
elapsed := time.Since(start)
log.Printf("所有壁纸下载完毕! 消耗时间 %s\n", elapsed)
}
// parseHTML 从页面解析出包含壁纸的下载地址,并返回切片类型
func parseHTML(n int) []string {
urlPool := []string{}
log.Println("初始化 urlPool", urlPool)
for i := 1; i <= n; i++ {
// 下载n页的壁纸
url := fmt.Sprintf("%s%s%d", BASE_URL, "/?p=", i)
request, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Println(err)
}
request.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0")
// request.Header.Set("Accept-Encoding": "gzip, deflate, br")
request.Header.Set("Connection", "keep-alive")
resp, err := client.Do(request)
if err != nil {
log.Println(err)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
log.Fatal(err)
}
resp.Body.Close()
// 根据 class查找
doc.Find("img[itemprop]").Each(func(i int, s *goquery.Selection) {
href, IsExist := s.Attr("src")
if IsExist {
href = strings.TrimSpace(href)
// if (strings.HasPrefix(href, "/photo")) && (strings.HasSuffix(href, "force=download")) {
// urlPool = append(urlPool, href)
// }
urlPool = append(urlPool, href)
}
})
}
return urlPool
}
// saveImg 接收url地址开始下载,并将结果通过 channel 返回给 main
func saveImg(url string, ch chan string) {
defer wg.Done()
ch <- url
// 修改要下载的分辨率
url = strings.Replace(url, "576x324", "1920x1080", 1)
log.Printf("开始下载 %s", url)
fileName := strings.Split(url, "/")[2]
// fileName = strings.Replace(fileName, "?force=download", ".jpg", 1)
// url = strings.Replace(BASE_URL, "/?", "", 1) + url
url = BASE_URL + "/" + url
request, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Println(err)
}
request.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0")
// request.Header.Set("Accept-Encoding": "gzip, deflate, br")
request.Header.Set("Connection", "keep-alive")
resp, err := client.Do(request)
if err != nil {
log.Println(err)
}
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("读取http响应失败", err)
}
// 创建文件
img, err := os.Create(DOWNLOADDIR + fileName)
if err != nil {
log.Println("创建文件失败", err)
}
if _, err := io.Copy(img, bytes.NewReader(data)); err != nil {
log.Println("文件写入错误", err)
}
log.Printf("%s 下载完成 \n", fileName)
<-ch
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment