Skip to content

Instantly share code, notes, and snippets.

@songtianyi
Last active July 13, 2020 11:30
Show Gist options
  • Save songtianyi/c484d3eff24b2910c4b64d81951c8fb4 to your computer and use it in GitHub Desktop.
Save songtianyi/c484d3eff24b2910c4b64d81951c8fb4 to your computer and use it in GitHub Desktop.
A cli tool to crawl available e-books from your wish list(eg. douban)
package main
import (
"fmt"
"os"
"strconv"
"github.com/songtianyi/laosj/spider"
"github.com/urfave/cli"
)
func doubanHandler(c *cli.Context) error {
uid := c.String("uid")
uri := "https://book.douban.com/people/" + uid + "/wish"
ns1, err := spider.CreateSpiderFromUrl(uri)
if err != nil {
return err
}
t1, err := ns1.GetText("div>div>div.grid-16-8.clearfix>div.article>div.paginator>a")
if err != nil {
return err
}
fmt.Print(`<DOCTYPE! html>
<html lang="zh_CN">
<head>
<meta charset="utf-8">
<title>e-book crab - by songtianyi</title>
</head>
<style>
ul li {
list-style: none;
padding: 1px;
margin: 4px;
}
</style>
<body>
<ul>
`)
maxx := spider.FindMaxFromSliceString(1, t1)
for j := 0; j < maxx; j++ {
page := uri + "?start=" + strconv.Itoa(j*15) + "&sort=time&rating=all&filter=all&mode=grid"
ns2, err := spider.CreateSpiderFromUrl(page)
if err != nil {
fmt.Println(err)
continue
}
books, err := ns2.GetAttr("div>div>div.grid-16-8.clearfix>div.article>ul.interest-list>li.subject-item>div.info>h2>a", "href")
if err != nil {
fmt.Println(err)
continue
}
for _, book := range books {
ns3, err := spider.CreateSpiderFromUrl(book)
if err != nil {
fmt.Println(err)
continue
}
ebooks, err := ns3.GetHtml("div>div>div.grid-16-8.clearfix>div.aside>div.gray_ad>div#buyinfo-ebook>ul.bs.noline.more-after")
if err != nil {
fmt.Println(err)
continue
}
titles, err := ns3.GetText("div>h1>span")
if err != nil {
fmt.Println(err)
continue
}
if len(ebooks) > 0 {
fmt.Print(`<li><ul>《` + titles[0] + `》` + ebooks[0] + `</ul></li>`)
}
}
}
fmt.Print(`
</ul>
</body>
</html>`)
return nil
}
func main() {
app := cli.NewApp()
app.Usage = "A cli tool to crawl available e-books from your wish list(eg. douban)"
app.Version = "1.0.0"
app.Commands = []cli.Command{
{
Name: "douban",
Aliases: []string{"douban"},
Usage: "start crawling douban wish list",
Action: doubanHandler,
Flags: []cli.Flag{
cli.StringFlag{
Name: "user, uid",
Value: "64692178",
Usage: "douban user id",
},
},
},
}
err := app.Run(os.Args)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
return
}
@songtianyi
Copy link
Author

songtianyi commented Jun 25, 2019

go run e-book-crab.go  douban > e-books-douban.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment