Skip to content

Instantly share code, notes, and snippets.

@kokizzu
Forked from mashingan/downloadmanga.go
Created September 24, 2021 07:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kokizzu/08f0ccacf29628663c6b25b92494fa9e to your computer and use it in GitHub Desktop.
Save kokizzu/08f0ccacf29628663c6b25b92494fa9e to your computer and use it in GitHub Desktop.
package main
import (
"encoding/xml"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"sync"
"time"
)
func toString(name xml.Name) string {
return name.Space + ":" + name.Local
}
func isPageClass(t xml.StartElement) bool {
return t.Name == xml.Name{Space: "", Local: "div"} &&
len(t.Attr) > 0 &&
t.Attr[0].Value == "page"
}
func nextToken(d *xml.Decoder) (token xml.Token, err error) {
token, err = d.Token()
if err != nil {
return
}
switch v := token.(type) {
default:
fmt.Printf("the type: %T\n", v)
}
return
}
func pageExtract(linkurl string) (nextlink string, imglink string, err error) {
resp, err := http.Get(linkurl)
if err != nil {
return "", "", err
}
decoder := xml.NewDecoder(resp.Body)
decoder.AutoClose = []string{"link", "meta"}
decoder.Strict = false
searching:
for {
token, err := decoder.Token()
if err != nil {
fmt.Println(err)
if err == io.EOF {
break searching
}
} else {
switch v := token.(type) {
case xml.StartElement:
if isPageClass(v) {
fmt.Println("found!")
break searching
}
default:
}
}
}
_, err = nextToken(decoder)
if err != nil {
fmt.Println(err)
}
stelm, err := nextToken(decoder)
nstelm, ok := stelm.(xml.StartElement)
if !ok {
fmt.Println("cannot convert to start element")
return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element")
}
fmt.Println("next link:", nstelm.Attr[0].Value)
img, err := nextToken(decoder)
nimg, ok := img.(xml.StartElement)
if !ok {
fmt.Println("cannot convert to start element")
return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element")
}
base, _ := url.Parse(linkurl)
nexturl, _ := url.Parse(nstelm.Attr[0].Value)
nextlink = base.ResolveReference(nexturl).String()
imglink = "https:" + nimg.Attr[1].Value
err = nil
return
}
func downloadFile(imglink string) error {
lenImgName := strings.Split(imglink, "/")
fname := lenImgName[len(lenImgName)-1]
file, err := os.Create(fname)
defer file.Close()
if err != nil {
fmt.Println(err)
return err
}
imgresp, err := http.Get(imglink)
if err != nil {
fmt.Println(err)
return err
}
defer imgresp.Body.Close()
size, err := io.Copy(file, imgresp.Body)
if err != nil {
fmt.Println(err)
return err
}
fmt.Printf("downloaded a file %s with size %d\n", fname, size)
return nil
}
func chapterExtract(link string) (int, error) {
rpos := strings.Index(link, "/r/")
if rpos == -1 {
return rpos, nil
}
nextslash := strings.Index(link[rpos+3:], "/")
nextslash += rpos + 3
chapstr := strings.Index(link[nextslash+1:], "/")
chapstr += nextslash + 1
intonly := link[nextslash+1 : chapstr]
result, err := strconv.Atoi(intonly)
if err != nil {
return -1, err
}
return result, nil
}
func main() {
mangaurl := ""
flag.StringVar(&mangaurl, "url", "", "Please specify the url")
flag.Parse()
if mangaurl == "" {
fmt.Println("url:", mangaurl)
panic("No url specified")
}
var wg sync.WaitGroup
thisChapter, err := chapterExtract(mangaurl)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("This chapter:", thisChapter)
var nextlink, imglink string
nextlink = mangaurl
start := time.Now()
fetchingpage:
for {
nextlink, imglink, err = pageExtract(nextlink)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Next link:", nextlink)
fmt.Println("img link:", imglink)
if imglink == "" {
break fetchingpage
}
wg.Add(1)
go func(w *sync.WaitGroup, link string) {
defer w.Done()
if err := downloadFile(link); err != nil {
fmt.Println(err)
}
}(&wg, imglink)
nextChapter, err := chapterExtract(nextlink)
if err != nil {
fmt.Println(err)
return
} else if nextChapter == -1 {
break fetchingpage
}
fmt.Printf("old %d and current %d\n", thisChapter, nextChapter)
if nextChapter != thisChapter {
break fetchingpage
}
}
wg.Wait()
fmt.Println("ended after:", time.Now().Sub(start))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment