Skip to content

Instantly share code, notes, and snippets.

@evgenybf
Created December 7, 2015 11:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evgenybf/fdee37f5f0f7882f6a26 to your computer and use it in GitHub Desktop.
Save evgenybf/fdee37f5f0f7882f6a26 to your computer and use it in GitHub Desktop.
Grab all pictures from a folder on quizlet.com
package main
import (
"bufio"
"io"
"io/ioutil"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"os"
"strings"
"github.com/PuerkitoBio/goquery"
)
const (
verbose = false
startURL = "https://quizlet.com/dprtmaktk/folders/4000-essential-english-words"
)
func _newDocumentFromResponse(resp *http.Response) (doc *goquery.Document, err error) {
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if verbose {
log.Print(string(body))
}
doc, err = goquery.NewDocumentFromReader(strings.NewReader(string(body)))
if err != nil {
return nil, err
}
return doc, nil
}
func doGet(client *http.Client, url string) (doc *goquery.Document, err error) {
log.Print("GET: ", url)
if verbose {
log.Print("cookies: ", client.Jar)
}
resp, err := client.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return _newDocumentFromResponse(resp)
}
func doGetFile(client *http.Client, url string, out io.Writer) (err error) {
log.Print("GET: ", url)
if verbose {
log.Print("cookies: ", client.Jar)
}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
_, err = io.Copy(out, resp.Body)
return err
}
func makeAbsoluteURL(baseURL string, relativeURL string) string {
url2Url, _ := url.Parse(baseURL)
action2Url, _ := url.Parse(relativeURL)
return url2Url.ResolveReference(action2Url).String()
}
func makeImageFileName(word string) string {
return "output/" + word + ".jpg"
}
func loadAllImages(client *http.Client, pageURL string) error {
doc, err := doGet(client, pageURL)
if err != nil {
return err
}
doc.Find("div#terms").Find("div.term,has-photo").Each(func(i int, s *goquery.Selection) {
word := strings.TrimSpace(s.Find("div.text").First().Find("span.qWord").Text())
log.Print("word: ", word)
imgSrc, ok := s.Find("span.photo").Find("img").Attr("src") //srcdata-srcset
if !ok {
log.Print("error: image not found")
return
}
imgSrc = makeAbsoluteURL(pageURL, imgSrc)
out, err := os.Create(makeImageFileName(word))
if err != nil {
log.Print("error: ", err.Error())
return
}
w := bufio.NewWriter(out)
err = doGetFile(client, imgSrc, w)
if err != nil {
log.Print("error: ", err.Error())
}
w.Flush()
out.Close()
})
return nil
}
func main() {
jar, err := cookiejar.New(nil)
if err != nil {
log.Fatal(err)
}
client := &http.Client{Jar: jar}
doc, err := doGet(client, startURL)
if err != nil {
log.Fatal(err)
}
subFolders := []string{}
doc.Find("article#folder-sets").Find("a.set-link").Each(func(i int, s *goquery.Selection) {
href, ok := s.Attr("href")
if ok {
href = makeAbsoluteURL(startURL, href)
subFolders = append(subFolders, href)
}
})
for _, folderURL := range subFolders {
log.Print(folderURL)
err := loadAllImages(client, folderURL)
if err != nil {
log.Print(err.Error())
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment