Skip to content

Instantly share code, notes, and snippets.

@fishy
Last active June 15, 2023 03:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fishy/cd5bc89e9fa325728ba79db8ed1c664a to your computer and use it in GitHub Desktop.
Save fishy/cd5bc89e9fa325728ba79db8ed1c664a to your computer and use it in GitHub Desktop.
Render static HTML files from mysqldump of NucleusCMS

Render static HTML files from mysqldump of NucleusCMS

Go code to render static HTML files from mysqldump of NucleusCMS.

Note that it requires you to use --complete-insert arg with mysqldump.

module go.yhsif.com/nucleus-html
go 1.20
require (
go.yhsif.com/ctxslog v0.0.0-20230527163140-c92ec9addc9c
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
)
go.yhsif.com/ctxslog v0.0.0-20230527163140-c92ec9addc9c h1:ZHM+YFPO0bYaEPymaK+v469f+auErHsltarJfWojgLY=
go.yhsif.com/ctxslog v0.0.0-20230527163140-c92ec9addc9c/go.mod h1:GZQJAL7ZnD9J9fhGWkny4jLns72AvT7K3eJXT0WMBgw=
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc=
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
package main
import (
"bufio"
"bytes"
"embed"
"errors"
"flag"
"fmt"
"html/template"
"io"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"time"
"unicode"
"go.yhsif.com/ctxslog"
"golang.org/x/exp/slog"
)
//go:embed *.tmpl
var templates embed.FS
var (
logLevel slog.Level
out = flag.String(
"out",
"out",
"The output directory",
)
ext = flag.String(
"extension",
".html",
"File extension",
)
lang = flag.String(
"lang",
"en-us",
"Language of the blog",
)
css = flag.String(
"css",
"white.css",
"Relative CSS url",
)
favIcon = flag.String(
"fav-icon",
"https://wang.yuxuan.org/greenfish.png",
"Fav icon url",
)
favIconType = flag.String(
"fav-icon-type",
"image/png",
"Fav icon mime type",
)
blogID = flag.String(
"blog-id",
"1",
"The blog id to render",
)
templateName = flag.String(
"template-name",
"white",
"The name of the template",
)
itemTableSuffix = flag.String(
"item-table-suffix",
"nucleus_item",
"The suffix of the table containing blog items",
)
blogTableSuffix = flag.String(
"blog-table-suffix",
"nucleus_blog",
"The suffix of the table containing blogs",
)
memberTableSuffix = flag.String(
"member-table-suffix",
"nucleus_member",
"The suffix of the table containing members",
)
)
// Example:
// INSERT INTO `table` (`column1`, `column2`, `column3`) VALUES ('value1a','value1b',1),('value2a','value2b',2);
var insertRE = regexp.MustCompile(
`^` +
`(?iU)` + // case-insensitive and non-greedy
`INSERT INTO ` +
"`" + `(.*)` + "` " + // 1: table part
`\((.*)\) ` + // 2: columns part
`VALUES ` +
`(.*)` + // 3: values part
`;$`,
)
func main() {
flag.TextVar(
&logLevel,
"log-level",
slog.LevelDebug,
"Minimal log level",
)
flag.Parse()
slog.SetDefault(slog.New(ctxslog.ContextHandler(slog.NewTextHandler(
os.Stderr,
&slog.HandlerOptions{
AddSource: true,
Level: logLevel,
ReplaceAttr: ctxslog.ChainReplaceAttr(
ctxslog.StringDuration,
),
}),
)))
tName := fmt.Sprintf("%s.tmpl", *templateName)
t, err := template.New(tName).ParseFS(templates, tName)
if err != nil {
slog.Error("Failed to parse template", "err", err, "template", *templateName)
os.Exit(1)
}
slog.Debug("template", "template", t.DefinedTemplates())
index := readEntries(os.Stdin)
for _, entry := range index.Entries {
filename := filepath.Join(*out, entry.Link) + *ext
if err := writeToFile(t, filename, entry); err != nil {
slog.Error("Failed to write file", "err", err, "title", entry.Title, "link", entry.Link)
fmt.Fprintf(os.Stderr, "body html:\n%s\n", entry.Body)
}
}
tName = fmt.Sprintf("%s-index.tmpl", *templateName)
if err := writeIndex(tName, index); err != nil {
slog.Error("Failed to write index file", "err", err)
}
}
func readEntries(r io.Reader) *allItems {
// Use bufio.Reader over bufio.Scanner because some lines are too long for
// Scanner.
reader := bufio.NewReader(r)
authors := make(map[string]author)
blogs := make(map[string]blog)
var items []item
for {
line, err := reader.ReadBytes('\n')
if err != nil {
if !errors.Is(err, io.EOF) {
slog.Error("Failed to read line", "err", err)
}
break
}
line = line[:len(line)-1]
groups := insertRE.FindSubmatch(line)
if len(groups) == 0 {
// Not an INSERT INTO line
continue
}
table := string(groups[1])
if !strings.HasSuffix(table, *itemTableSuffix) && !strings.HasSuffix(table, *blogTableSuffix) && !strings.HasSuffix(table, *memberTableSuffix) {
continue
}
columns := parseColumns(groups[2])
values, err := parseValues(string(groups[3]), columns)
if err != nil {
slog.Error("Unable to parse values", "err", err, "values", values)
continue
}
slog.Debug("Matched line", "table", table, "columns", columns, "values", len(values))
switch {
case strings.HasSuffix(table, *itemTableSuffix):
items = append(items, parseItems(values, *blogID)...)
case strings.HasSuffix(table, *blogTableSuffix):
parseBlogs(blogs, values, *blogID)
case strings.HasSuffix(table, *memberTableSuffix):
parseAuthors(authors, values)
}
}
if len(blogs) == 0 {
slog.Error("Don't have any matching blogs")
return nil
}
if len(authors) == 0 {
slog.Error("Don't have any authors")
return nil
}
if len(items) == 0 {
slog.Error("Don't have any matching blog items")
return nil
}
sortItems(items)
return join(items, blogs, authors)
}
func parseColumns(columns []byte) []string {
split := bytes.Split(columns, []byte(","))
c := make([]string, len(split))
for i, s := range split {
s := bytes.TrimFunc(s, func(r rune) bool {
// trim backtick and spaces
if r == '`' {
return true
}
return unicode.IsSpace(r)
})
c[i] = string(s)
}
return c
}
func parseValues(s string, columns []string) ([]map[string]string, error) {
n := len(columns)
var values []map[string]string
for len(s) > 0 {
var err error
value := make(map[string]string, n)
for i := 0; i < n; i++ {
token := byte(',')
if i == 0 {
token = '('
}
s, err = consumeNextToken(s, token)
if err != nil {
return values, err
}
v, tail, err := consumeNextValue(s)
if err != nil {
return values, err
}
value[columns[i]] = v
s = tail
}
values = append(values, value)
s, err = consumeNextToken(s, ')')
if err != nil {
return values, err
}
if len(s) == 0 {
return values, err
}
s, err = consumeNextToken(s, ',')
if err != nil {
return values, err
}
}
return values, nil
}
func consumeNextToken(s string, token byte) (string, error) {
if s[0] != token {
return s, fmt.Errorf("want token %q got %s", token, shortString(s, 10))
}
return s[1:], nil
}
func consumeNextValue(s string) (value, tail string, err error) {
const (
quote = '\''
escape = '\\'
)
var isString bool
if s[0] == quote {
isString = true
s = s[1:]
}
var runes []rune
var escaping bool
origRunes := []rune(s)
var tailIndex int
for i, r := range origRunes {
if escaping {
escaping = false
if r != quote {
runes = append(runes, escape)
}
runes = append(runes, r)
continue
}
if isString {
if r == quote {
tailIndex = i + 1
break
}
} else {
if r == ',' || r == ')' {
tailIndex = i
break
}
}
if isString && r == escape {
escaping = true
continue
}
runes = append(runes, r)
}
ret := string(runes)
if isString && ret != "" {
s, err := strconv.Unquote(`"` + ret + `"`)
if err != nil {
return string(runes), ret, fmt.Errorf("failed to unquote %q: %q, %w", ret, s, err)
}
ret = s
}
return ret, string(origRunes[tailIndex:]), nil
}
func shortString(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}
type item struct {
ID string
Title string
URLTitle string
Body string
BlogID string
AuthorID string
Time time.Time
}
func parseItems(values []map[string]string, blogID string) []item {
items := make([]item, 0, len(values))
for _, v := range values {
if v["idraft"] == "1" || v["iposted"] != "1" || v["iblog"] != blogID {
continue
}
t, err := time.Parse(time.DateTime, v["itime"])
if err != nil {
slog.Error("Failed to parse time", "err", err, "time", v["itime"], "values", v)
continue
}
body := v["ibody"]
if more := strings.TrimSpace(v["imore"]); more != "" {
body = body + "\n" + more
}
items = append(items, item{
ID: v["inumber"],
Title: v["ititle"],
URLTitle: generateURLTitle(v["iurltitle"], v["ititle"]),
Body: body,
BlogID: v["iblog"],
AuthorID: v["iauthor"],
Time: t,
})
}
sortItems(items)
return items
}
func sortItems(items []item) {
sort.Slice(items, func(i, j int) bool {
return items[i].Time.After(items[j].Time)
})
}
type blog struct {
ID string
Name string
Desc string
URL string
}
func parseBlogs(blogs map[string]blog, values []map[string]string, blogID string) {
for _, v := range values {
id := v["bnumber"]
if id != blogID {
continue
}
blogs[id] = blog{
ID: id,
Name: v["bname"],
Desc: v["bdesc"],
URL: v["burl"],
}
}
}
type author struct {
ID string
Name string
URL string
}
func parseAuthors(authors map[string]author, values []map[string]string) {
for _, v := range values {
id := v["mnumber"]
authors[id] = author{
ID: id,
Name: v["mname"],
URL: v["murl"],
}
}
}
type joint struct {
BlogName string
BlogDesc string
Lang string
CSS string
Index string
FavIcon string
FavIconType string
Date string
Title string
Time string
Link string
Author string
AuthorURL string
Body template.HTML
}
func join(items []item, blogs map[string]blog, authors map[string]author) *allItems {
index := &allItems{
Lang: *lang,
CSS: *css,
FavIcon: *favIcon,
FavIconType: *favIconType,
}
index.Entries = make([]joint, 0, len(items))
first := true
for _, i := range items {
a, ok := authors[i.AuthorID]
if !ok {
slog.Error("Failed to find author for item", "item", i, "author", i.AuthorID)
continue
}
b, ok := blogs[i.BlogID]
if !ok {
slog.Error("Failed to find blog for item", "item", i, "blog", i.BlogID)
continue
}
indexURL := b.URL
if !strings.HasSuffix(indexURL, "/") {
indexURL += "/"
}
if first {
first = false
index.BlogName = b.Name
index.BlogDesc = b.Desc
index.Index = indexURL
}
link := fmt.Sprintf("item/%s", i.ID)
if i.URLTitle != "" {
link = fmt.Sprintf("item/%s/%s", i.Time.Format("2006/01"), strings.ReplaceAll(i.URLTitle, "_", "-"))
}
slog.Debug("link", "link", link, "urltitle", i.URLTitle, "title", i.Title)
index.Entries = append(index.Entries, joint{
BlogName: b.Name,
BlogDesc: b.Desc,
Lang: *lang,
CSS: *css,
Index: indexURL,
FavIcon: *favIcon,
FavIconType: *favIconType,
Date: i.Time.Format(time.DateOnly),
Title: i.Title,
Time: i.Time.Format(time.TimeOnly),
Link: link,
Author: a.Name,
AuthorURL: a.URL,
Body: template.HTML(i.Body),
})
}
return index
}
func generateURLTitle(urlTitle, title string) string {
if urlTitle == "" {
return urlTitle
}
if urlTitle != "" && urlTitle != "NULL" {
return strings.ReplaceAll(urlTitle, "_", "-")
}
title = strings.ToLower(title)
var sb strings.Builder
var nonAlphanum bool
for _, r := range title {
if unicode.IsLetter(r) || unicode.IsNumber(r) {
if nonAlphanum {
sb.WriteRune('-')
}
nonAlphanum = false
sb.WriteRune(r)
} else {
nonAlphanum = true
}
}
return sb.String()
}
func writeToFile(t *template.Template, filename string, data any) (err error) {
dir := filepath.Dir(filename)
if err := os.MkdirAll(dir, 0775); err != nil {
return fmt.Errorf("failed to create dir %q for entry: %w", dir, err)
}
f, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create file %q for entry: %w", filename, err)
}
defer func() {
if closeErr := f.Close(); closeErr != nil {
err = errors.Join(err, fmt.Errorf("failed to close file: %w", closeErr))
}
}()
slog.Debug("writing to file...", "filename", filename)
if err := t.Execute(f, data); err != nil {
return fmt.Errorf("failed to execute template: %w", err)
}
return nil
}
type allItems struct {
BlogName string
BlogDesc string
Lang string
CSS string
Index string
FavIcon string
FavIconType string
Entries []joint
}
func writeIndex(name string, index *allItems) error {
t, err := template.New(name).ParseFS(templates, name)
if err != nil {
return fmt.Errorf("failed to parse index template: %w", err)
}
filename := filepath.Join(*out, "index.html")
return writeToFile(t, filename, index)
}
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta name="generator" content="nucleus-html" />
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Cache-Control" content="no-cache, must-revalidate" />
<meta http-equiv="Expires" content="-1" />
<title>{{.BlogName}}</title>
<meta property="og:title" content="{{.BlogName}}"/>
<meta property="og:site_name" content="{{.BlogName}}"/>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta http-equiv="Content-Language" content="{{.Lang}}" />
<link rel="stylesheet" type="text/css" href="{{.Index}}{{.CSS}}" />
<link rel="top" title="Today" href="{{.Index}}" />
<link rel="shortcut icon" type="{{.FavIconType}}" href="{{.FavIcon}}" />
</head>
<body>
<!-- empty div to create header -->
<div id="topbar"></div>
<div id="content">
<!-- start content -->
<h1>{{.BlogName}}</h1>
<ul>
{{range .Entries}}
<li><i>{{.Date}}</i> <a href="{{.Index}}{{.Link}}">{{.Title}}</a></li>
{{end}}
</ul>
<div id="tagline"><p>{{.BlogDesc}}</p></div>
<div id="menu">
<h3>My Homepage</h3>
<ul>
<li><a href="https://wang.yuxuan.org/index.html">Home</a></li>
<li><a href="https://wang.yuxuan.org/personal.html">Personal &amp; Contact info.</a></li>
<li><a href="https://wang.yuxuan.org/cv.html">Curriculum Vitae</a></li>
<li><a href="https://wang.yuxuan.org/fuwa.html">Fuwa</a></li>
<li><a href="https://b.yuxuan.org/">Blog</a></li>
<li><a href="https://wang.yuxuan.org/blog/">Old Blog</a></li>
<li><a href="https://www.flickr.com/photos/fishywang/">Photos</a></li>
</ul>
<h3>Meta</h3>
<p class="others"><a href="https://validator.w3.org/check?uri=referer"><img
style="border:0;width:80px;height:15px"
src="/pix/w3cxhtml.png"
alt="Valid XHTML 1.0!" /></a></p>
<p class="others"><a href="https://jigsaw.w3.org/css-validator/check/referer">
<img style="border:0;width:80px;height:15px"
src="/pix/vcss.png"
alt="Valid CSS!" />
</a></p>
<p class="others"><a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/3.0/"><img alt="Creative Commons License" title="Some rights reserved" style="border-width:0" src="https://licensebuttons.net/l/by-nc-sa/3.0/80x15.png"/></a></p>
<!-- <rdf:RDF xmlns="http://web.resource.org/cc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<Work rdf:about="">
<license rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/" />
</Work>
<License rdf:about="http://creativecommons.org/licenses/by-nc-sa/3.0/"><permits rdf:resource="http://web.resource.org/cc/Reproduction"/><permits rdf:resource="http://web.resource.org/cc/Distribution"/><requires rdf:resource="http://web.resource.org/cc/Notice"/><requires rdf:resource="http://web.resource.org/cc/Attribution"/><prohibits rdf:resource="http://web.resource.org/cc/CommercialUse"/><permits rdf:resource="http://web.resource.org/cc/DerivativeWorks"/><requires rdf:resource="http://web.resource.org/cc/ShareAlike"/></License></rdf:RDF> -->
<h3>Credits</h3>
<ul>
<li>This template original designed by <a href="http://blogtemplates.noipo.org">Martijn ten Napel</a></li>
</ul>
</div>
</body>
</html>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta name="generator" content="nucleus-html" />
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Cache-Control" content="no-cache, must-revalidate" />
<meta http-equiv="Expires" content="-1" />
<title>{{.Title}}</title>
<meta property="og:title" content="{{.Title}}"/>
<meta property="og:site_name" content="{{.BlogName}}"/>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta http-equiv="Content-Language" content="{{.Lang}}" />
<link rel="stylesheet" type="text/css" href="{{.Index}}{{.CSS}}" />
<link rel="top" title="Today" href="{{.Index}}" />
<link rel="shortcut icon" type="{{.FavIconType}}" href="{{.FavIcon}}" />
</head>
<body>
<!-- empty div to create header -->
<div id="topbar"></div>
<div id="content">
<!-- start content -->
<h1>{{.BlogName}}</h1>
<h2>{{.Date}}</h2>
<article>
<h3>{{.Title}}</h3>
<div class="posts">
{{.Body}}
</div>
</article>
<blockquote class="postblock" title="post information">
<p>{{.Time}} by <a href="{{.AuthorURL}}">{{.Author}}</a> - <a href="{{.Index}}{{.Link}}">Permanent Link</a></p>
</div>
</blockquote>
<div id="tagline"><p>{{.BlogDesc}}</p></div>
<div id="menu">
<h3>My Homepage</h3>
<ul>
<li><a href="https://wang.yuxuan.org/index.html">Home</a></li>
<li><a href="https://wang.yuxuan.org/personal.html">Personal &amp; Contact info.</a></li>
<li><a href="https://wang.yuxuan.org/cv.html">Curriculum Vitae</a></li>
<li><a href="https://wang.yuxuan.org/fuwa.html">Fuwa</a></li>
<li><a href="https://b.yuxuan.org/">Blog</a></li>
<li><a href="https://wang.yuxuan.org/blog/">Old Blog</a></li>
<li><a href="https://www.flickr.com/photos/fishywang/">Photos</a></li>
</ul>
<h3>Meta</h3>
<p class="others"><a href="https://validator.w3.org/check?uri=referer"><img
style="border:0;width:80px;height:15px"
src="/pix/w3cxhtml.png"
alt="Valid XHTML 1.0!" /></a></p>
<p class="others"><a href="https://jigsaw.w3.org/css-validator/check/referer">
<img style="border:0;width:80px;height:15px"
src="/pix/vcss.png"
alt="Valid CSS!" />
</a></p>
<p class="others"><a rel="license" href="https://creativecommons.org/licenses/by-nc-sa/3.0/"><img alt="Creative Commons License" title="Some rights reserved" style="border-width:0" src="https://licensebuttons.net/l/by-nc-sa/3.0/80x15.png"/></a></p>
<!-- <rdf:RDF xmlns="http://web.resource.org/cc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<Work rdf:about="">
<license rdf:resource="http://creativecommons.org/licenses/by-nc-sa/3.0/" />
</Work>
<License rdf:about="http://creativecommons.org/licenses/by-nc-sa/3.0/"><permits rdf:resource="http://web.resource.org/cc/Reproduction"/><permits rdf:resource="http://web.resource.org/cc/Distribution"/><requires rdf:resource="http://web.resource.org/cc/Notice"/><requires rdf:resource="http://web.resource.org/cc/Attribution"/><prohibits rdf:resource="http://web.resource.org/cc/CommercialUse"/><permits rdf:resource="http://web.resource.org/cc/DerivativeWorks"/><requires rdf:resource="http://web.resource.org/cc/ShareAlike"/></License></rdf:RDF> -->
<h3>Credits</h3>
<ul>
<li>This template original designed by <a href="http://blogtemplates.noipo.org">Martijn ten Napel</a></li>
</ul>
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment