Skip to content

Instantly share code, notes, and snippets.

@owulveryck
Last active July 2, 2021 06:00
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save owulveryck/510cad103240b345b7db56dfd6a333b9 to your computer and use it in GitHub Desktop.
Save owulveryck/510cad103240b345b7db56dfd6a333b9 to your computer and use it in GitHub Desktop.
Simple script to turn a webpage into an epub
package main
import (
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"github.com/bmaupin/go-epub"
"github.com/cixtor/readability"
"golang.org/x/net/html"
)
func main() {
if len(os.Args) != 3 {
log.Fatalf("usage: %v URL output.epub", os.Args[0])
}
url := os.Args[1]
r := readability.New()
res, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
article, err := r.Parse(res.Body, url)
if err != nil {
log.Fatal(err)
}
d := newDocument()
err = d.setMeta(&article)
if err != nil {
log.Fatal(err)
}
err = d.replaceImages(article.Node)
if err != nil {
log.Fatal(err)
}
var body strings.Builder
err = html.Render(&body, article.Node)
if err != nil {
// handle error
log.Fatal(err)
}
// Create a new EPUB
d.AddSection(body.String(), "Content", "", "")
err = d.Write(os.Args[2])
if err != nil {
log.Fatal(err)
}
}
type document struct {
*epub.Epub
buf strings.Builder
currSection string
}
func newDocument() *document {
return &document{
Epub: epub.NewEpub(""),
buf: strings.Builder{},
currSection: "Preamble",
}
}
func (d *document) setMeta(a *readability.Article) error {
d.SetTitle(a.Title)
d.SetDescription(a.Excerpt)
d.SetAuthor(a.Byline)
if a.Image != "" {
img, err := d.AddImage(a.Image, "")
if err != nil {
return err
}
d.SetCover(img, "")
}
return nil
}
func (d *document) replaceImages(n *html.Node) error {
if n.Type == html.ElementNode && n.Data == "img" {
for i, a := range n.Attr {
if a.Key == "src" {
// get the filname
u, err := url.Parse(a.Val)
if err != nil {
return err
}
f := filepath.Base(u.Path)
img, err := d.AddImage(a.Val, f)
if err != nil {
return err
}
n.Attr[i].Val = img
}
// remove the srcset
if a.Key == "srcset" {
n.Attr[i] = n.Attr[len(n.Attr)-1] // Copy last element to index i.
n.Attr[len(n.Attr)-1] = html.Attribute{} // Erase last element (write zero value).
n.Attr = n.Attr[:len(n.Attr)-1] // Truncate slice.
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
err := d.replaceImages(c)
if err != nil {
return err
}
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment