Created
December 7, 2015 11:08
-
-
Save evgenybf/fdee37f5f0f7882f6a26 to your computer and use it in GitHub Desktop.
Grab all pictures from a folder on quizlet.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"io" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"net/http/cookiejar" | |
"net/url" | |
"os" | |
"strings" | |
"github.com/PuerkitoBio/goquery" | |
) | |
const ( | |
verbose = false | |
startURL = "https://quizlet.com/dprtmaktk/folders/4000-essential-english-words" | |
) | |
func _newDocumentFromResponse(resp *http.Response) (doc *goquery.Document, err error) { | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
return nil, err | |
} | |
if verbose { | |
log.Print(string(body)) | |
} | |
doc, err = goquery.NewDocumentFromReader(strings.NewReader(string(body))) | |
if err != nil { | |
return nil, err | |
} | |
return doc, nil | |
} | |
func doGet(client *http.Client, url string) (doc *goquery.Document, err error) { | |
log.Print("GET: ", url) | |
if verbose { | |
log.Print("cookies: ", client.Jar) | |
} | |
resp, err := client.Get(url) | |
if err != nil { | |
return nil, err | |
} | |
defer resp.Body.Close() | |
return _newDocumentFromResponse(resp) | |
} | |
func doGetFile(client *http.Client, url string, out io.Writer) (err error) { | |
log.Print("GET: ", url) | |
if verbose { | |
log.Print("cookies: ", client.Jar) | |
} | |
resp, err := client.Get(url) | |
if err != nil { | |
return err | |
} | |
defer resp.Body.Close() | |
_, err = io.Copy(out, resp.Body) | |
return err | |
} | |
func makeAbsoluteURL(baseURL string, relativeURL string) string { | |
url2Url, _ := url.Parse(baseURL) | |
action2Url, _ := url.Parse(relativeURL) | |
return url2Url.ResolveReference(action2Url).String() | |
} | |
func makeImageFileName(word string) string { | |
return "output/" + word + ".jpg" | |
} | |
func loadAllImages(client *http.Client, pageURL string) error { | |
doc, err := doGet(client, pageURL) | |
if err != nil { | |
return err | |
} | |
doc.Find("div#terms").Find("div.term,has-photo").Each(func(i int, s *goquery.Selection) { | |
word := strings.TrimSpace(s.Find("div.text").First().Find("span.qWord").Text()) | |
log.Print("word: ", word) | |
imgSrc, ok := s.Find("span.photo").Find("img").Attr("src") //srcdata-srcset | |
if !ok { | |
log.Print("error: image not found") | |
return | |
} | |
imgSrc = makeAbsoluteURL(pageURL, imgSrc) | |
out, err := os.Create(makeImageFileName(word)) | |
if err != nil { | |
log.Print("error: ", err.Error()) | |
return | |
} | |
w := bufio.NewWriter(out) | |
err = doGetFile(client, imgSrc, w) | |
if err != nil { | |
log.Print("error: ", err.Error()) | |
} | |
w.Flush() | |
out.Close() | |
}) | |
return nil | |
} | |
func main() { | |
jar, err := cookiejar.New(nil) | |
if err != nil { | |
log.Fatal(err) | |
} | |
client := &http.Client{Jar: jar} | |
doc, err := doGet(client, startURL) | |
if err != nil { | |
log.Fatal(err) | |
} | |
subFolders := []string{} | |
doc.Find("article#folder-sets").Find("a.set-link").Each(func(i int, s *goquery.Selection) { | |
href, ok := s.Attr("href") | |
if ok { | |
href = makeAbsoluteURL(startURL, href) | |
subFolders = append(subFolders, href) | |
} | |
}) | |
for _, folderURL := range subFolders { | |
log.Print(folderURL) | |
err := loadAllImages(client, folderURL) | |
if err != nil { | |
log.Print(err.Error()) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment