Skip to content

Instantly share code, notes, and snippets.

@hjhee
Created October 5, 2018 09:32
Show Gist options
  • Save hjhee/13bed41e342e66f236f9a6f390491033 to your computer and use it in GitHub Desktop.
Save hjhee/13bed41e342e66f236f9a6f390491033 to your computer and use it in GitHub Desktop.
A Image Crawler for Steam
package main
import (
"bytes"
"fmt"
"io/ioutil"
"net/http"
"net/http/httputil"
"net/url"
"os"
"regexp"
"github.com/PuerkitoBio/goquery"
)
var demoURL = `https://steamcommunity.com/app/49520/homecontent/?userreviewsoffset=0&p=10&workshopitemspage=10&readytouseitemspage=10&mtxitemspage=10&itemspage=10&screenshotspage=10&videospage=10&artpage=10&allguidepage=10&webguidepage=10&integratedguidepage=10&discussionspage=10&numperpage=10&browsefilter=toprated&browsefilter=toprated&appid=49520&appHubSubSection=4&appHubSubSection=4&l=schinese&filterLanguage=default&searchText=moxxi`
var parameters = []string{"p",
"workshopitemspage",
"readytouseitemspage",
"mtxitemspage",
"itemspage",
"screenshotspage",
"videospage",
"artpage",
"allguidepage",
"webguidepage",
"integratedguidepage",
"discussionspage",
}
var re *regexp.Regexp
func parse(res *http.Response) error {
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return err
}
if len(body) == 0 {
fmt.Printf("empty body")
dump, err := httputil.DumpResponse(res, false)
if err == nil {
fmt.Printf("dump:\n%q\n", dump)
}
return fmt.Errorf("ContentLength is 0")
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil {
return err
}
doc.Find(".apphub_CardContentMain img").Each(func(i int, s *goquery.Selection) {
src, e := s.Attr("src")
if e == false {
fmt.Printf("error finding image #%d: attribute \".apphub_CardContentMain img\" does not exists", i)
return
}
q, err := url.Parse(src)
if err != nil {
fmt.Printf("error parsing url (%s)#%d: %s", src, i, err.Error())
return
}
q.RawQuery = ""
fmt.Printf("img#%d: %s\n", i, q)
matches := re.FindStringSubmatch(q.String())
filename := "img/" + matches[1]
r, _ := http.Get(q.String())
b, _ := ioutil.ReadAll(r.Body)
r.Body.Close()
switch r.Header.Get("Content-Type") {
case "image/jpeg":
filename = filename + ".jpg"
case "image/png":
filename = filename + ".png"
case "image/gif":
filename = filename + ".gif"
}
fmt.Printf("img#%d: writing %s\n", i, filename)
f, _ := os.Create(filename)
defer f.Close()
f.Write(b)
f.Sync()
})
return nil
}
func main() {
re = regexp.MustCompile(`\/(\w+)\/?$`)
u, _ := url.Parse(demoURL)
q := u.Query()
for i := 1; true; i++ {
p := fmt.Sprintf("%d", i)
for _, k := range parameters {
q.Set(k, p)
}
u.RawQuery = q.Encode()
fmt.Printf("#%d url: %s\n", i, u)
res, _ := http.Get(u.String())
err := parse(res)
if err != nil {
break
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment