Skip to content

Instantly share code, notes, and snippets.

@sosoyososo
Last active March 10, 2016 03:25
Show Gist options
  • Save sosoyososo/55d6ecbb9d602d4db558 to your computer and use it in GitHub Desktop.
Save sosoyososo/55d6ecbb9d602d4db558 to your computer and use it in GitHub Desktop.
在esou.com使用任意小说的任意一章的url作为参数,获取这本小说的所有文本内容,并按照章节保存为txt (貌似已经因为esou网站的更新不管用了)
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"strings"
)
func getHtmlContentWithUrl(url string) []byte {
resp, err := http.Get(url)
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err == nil {
return body[:]
} else {
return []byte{}
}
}
func isPathExist(path string) bool {
_, err := os.Stat(path)
if err == nil {
return true
}
if os.IsNotExist(err) {
return false
}
return false
}
func makeDir(dir string) string {
if isPathExist(dir) == false {
os.Mkdir(dir, 0700)
return dir
}
return ""
}
func makeFileWithBytes(url string, content []byte) {
relativePath := "./pages/"
makeDir(relativePath)
relativePath += url
fmt.Println(relativePath)
file, err := os.Create(relativePath)
if nil == err {
defer file.Close()
file.Write(content)
}
}
func hasNextPage(currentPage string) bool {
mainTagContent := ">下章</a>"
index := strings.Index(currentPage, mainTagContent)
if index != -1 {
return true
}
return false
}
func getPageContent(content string) (string, string) {
title := ""
txt := ""
titleStart := "class=\"easou_tit2\">"
index := strings.Index(content, titleStart)
if -1 != index {
subcontent := content[index+len(titleStart):]
index = strings.Index(subcontent, "<")
title = subcontent[:index]
// fmt.Println(title)
subcontent = subcontent[index:]
contentStart := "class=\"easou_con\">"
index = strings.Index(subcontent, contentStart)
subcontent = subcontent[index+len(contentStart):]
index = strings.Index(subcontent, "</div>")
txt = subcontent[:index]
txt = strings.Replace(txt, "<br/>", "\n", -1)
// fmt.Println(txt)
}
return title, txt
}
func savePage(content string, page int) {
title, txt := getPageContent(content)
pageContent := title + "\n\n" + txt
makeFileWithBytes(fmt.Sprintf("%d.html", page), []byte(pageContent))
}
func getPage(url string, page int) bool {
bytes := getHtmlContentWithUrl(url)
hasNext := hasNextPage(string(bytes))
savePage(string(bytes), page)
// makeFileWithBytes(fmt.Sprintf("%d.html", page), bytes)
return hasNext
}
func pageUrl(page int, nid string) string {
return fmt.Sprintf("http://book.easou.com/c/show.m?&nid=%s&st=%d", nid, page)
}
func getContent(nid string) {
st := 1
for {
url := pageUrl(st, nid)
fmt.Println(url)
hasNext := getPage(url, st)
if !hasNext {
break
}
st++
}
}
func getNid(url string) string {
index := strings.Index(url, "nid=")
if index != -1 {
subUrl := url[index+4:]
index = strings.Index(subUrl, "&")
if index != -1 {
return subUrl[:index]
} else {
return subUrl
}
}
return ""
}
func main() {
url := os.Args[1]
getContent(getNid(url))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment