Skip to content

Instantly share code, notes, and snippets.

@pauliusbaulius
Created February 14, 2021 21:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pauliusbaulius/1f279f55fbdeea4a492e448cc30ea427 to your computer and use it in GitHub Desktop.
Save pauliusbaulius/1f279f55fbdeea4a492e448cc30ea427 to your computer and use it in GitHub Desktop.
shitty static site generator
import json
import os
import shutil
from datetime import datetime
from functools import wraps
from time import time
from typing import Optional
import jinja2
import requests
import tinify
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from jinja2 import Template
from markdown import Markdown
import collections
"""
STAGE USER INTERACTION
1. tweak global vars
2. add absolute paths to .md in POSTS list
"""
PATH_BLOG = "html"
PATH_MEDIA = os.path.join(PATH_BLOG, "media")
PATH_TEMPLATES = "static"
MAX_MEDIA_WIDTH = 500 # do not add px, vw, or any other css specification!
POSTS = []
"""
STAGE UTILS
here live utility functions
"""
def sizeof_fmt(num, suffix='B'):
# https://stackoverflow.com/questions/1094841/get-human-readable-version-of-file-size
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def write_log(function: str, log: str, args: dict = {}):
with open(os.path.join(PATH_BLOG, "ssg.txt"), "a") as l:
l.write(
f'"{datetime.utcnow()}","{os.getpid()}","{function}","{log}","{args}"\n'
)
def timer(f):
@wraps(f)
def wrap(*args, **kw):
ts = time()
result = f(*args, **kw)
te = time()
write_log(function=f.__name__, log="{:0.2f}ms".format((te - ts) * 1000))
return result
return wrap
"""
STAGE PREPARATION
1. load secret env variables +
2. make required directories +
3. setup jinja2 templating +
4. prepare minified css for inlining +
"""
load_dotenv(".env") # secrets like api keys for tinypng.com
os.makedirs(PATH_BLOG, exist_ok=True)
os.makedirs(PATH_MEDIA, exist_ok=True)
JINJA2_LOADER = jinja2.FileSystemLoader(searchpath=PATH_TEMPLATES)
JINJA2_ENV = jinja2.Environment(loader=JINJA2_LOADER)
with open(os.path.join(PATH_BLOG, "ssg.txt"), "w") as l:
l.write("DATETIME_UTC,PID,FUNCTION,LOG,ARGS\n")
@timer
def minify_css():
""" uses cssminifier.com api, is slow but least bloat. """
url = "https://cssminifier.com/raw"
response = requests.post(url, data={"input": open("static/style.css", "rb").read()})
with open("static/style.min.css", "w") as fw:
fw.write(response.text)
minify_css()
"""
STAGE BUILD
1. iterate POSTS to build html pages +
2. create index.html +
3. create about.html +
4. create tags.html
"""
class Post:
def __init__(self, path_markdown):
self.path_markdown = path_markdown
self.name = self._generate_post_name()
self.metadata = self._handle_metadata()
self.html = self._convert_to_html()
self.url = self.name + ".html"
self.toc = None
self.images = []
self.new_images = []
self._handle_images()
self._add_extra_metadata()
self._render_html()
def __lt__(self, other):
return self.metadata["date"] > other.metadata["date"]
def _generate_post_name(self):
""" takes your input path, gets filename, makes it lowercase, replaces spaces and appends html extension. """
_, tail = os.path.split(self.path_markdown)
return str(os.path.splitext(tail)[0]).replace(" ", "_").lower()
@timer
def _handle_metadata(self):
""" extract yaml header from markdown file if exists and convert to python dict. """
md = Markdown(extensions=["meta"])
with open(self.path_markdown, "r") as fr:
html = md.convert(fr.read())
metadata = md.Meta
for k, v in metadata.items():
if len(v) == 1: # convert single item lists into string for aesthetics
metadata[k] = "".join(v)
return metadata
@timer
def _convert_to_html(self):
md = Markdown(
extensions=[
"fenced_code",
"sane_lists",
"smarty",
"footnotes",
"tables",
"attr_list",
]
)
with open(self.path_markdown, "r") as fr:
return md.convert(fr.read())
@timer
def _extract_toc(self):
md = Markdown(extensions=["toc"])
with open(self.path_markdown, "r") as fr:
html = md.convert(fr.read())
return md.toc
@timer
def _add_extra_metadata(self):
""" some additional metadata is calculated here. """
def get_filesize(path_markdown):
return sizeof_fmt(os.path.getsize(self.path_markdown))
def get_wc(path_markdown):
with open(self.path_markdown, "r") as fr:
content = fr.read()
return f"{len(content.split())} {len(content)}"
def get_image_stats(path_markdown):
# krc tik ant antro build bus compressed metadata :DD
def _get_image_size(image):
try:
return os.path.getsize(image)
except FileNotFoundError:
return 0
size_images_og = sum([_get_image_size(image) for image in self.images])
size_images_compressed = sum([_get_image_size(image) for image in self.new_images])
return {
"original": sizeof_fmt(size_images_og),
"compressed": sizeof_fmt(size_images_compressed),
"difference": sizeof_fmt(size_images_og - size_images_compressed),
}
image_stats = get_image_stats(self.path_markdown)
self.metadata["wc"] = get_wc(self.path_markdown)
self.metadata["md_file_size"] = get_filesize(self.path_markdown)
self.metadata["images_size_original"] = image_stats["original"]
self.metadata["images_size_compressed"] = image_stats["compressed"]
self.metadata["images_savings"] = image_stats["difference"]
@timer
def _handle_images(self):
""" image handling pain in the ass. """
# todo hash names, move to media dir, replace names in html.
# todo rename handle_images and do all steps in here.
soup = BeautifulSoup(self.html, "html5lib")
for media in soup.find_all(["img", "source"]):
media_path = media.get("src")
head, _ = os.path.split(self.path_markdown)
# absolute path to picture in your filesystem
absolute_path = os.path.normpath(os.path.join(head, media_path))
self.images.append(absolute_path)
# check if image exists in webpage path before copying it
image_filename = os.path.basename(absolute_path)
if image_filename not in os.listdir(PATH_MEDIA):
try:
# shutil.copy replaces existing file...
new_path = shutil.copy(absolute_path, PATH_MEDIA)
# do not need PATH_BLOG since files are in that path.
new_path = new_path.replace(PATH_BLOG, "")
# replace old links with new links!
self.html = str(self.html).replace(media_path, new_path)
except FileNotFoundError:
write_log(
"ERROR",
"copy_media",
f"[{media_path}] was not found",
)
new_path = os.path.join("/media/", image_filename)[1:]
self.html = str(self.html).replace(
media_path, new_path
)
self.new_images.append(os.path.join( PATH_BLOG, new_path))
@timer
def _render_html(self):
template = JINJA2_ENV.get_template("post.html")
output = template.render(
metadata=json.dumps(
self.metadata, indent=4
), # prettifies metadata by converting to indented str
toc=self.toc,
content=self.html,
)
with open(os.path.join(PATH_BLOG, self.url), "w") as f:
f.write(output)
for i, post in enumerate(POSTS): # build posts!
POSTS[i] = Post(post) # clean code :^)
@timer
def build_html(
template: str,
filename: str,
content: dict = {},
):
template = JINJA2_ENV.get_template(template)
output = template.render(content)
with open(os.path.join(PATH_BLOG, filename), "w") as f:
f.write(output)
@timer
def build_tags() -> dict:
""" builds a dictionary of tag: [Post]. sorted by ?. """
tags = {}
for post in sorted(POSTS):
for tag in post.metadata["tags"]:
if tag in tags:
tags[tag].append(post)
else:
tags[tag] = [post]
return collections.OrderedDict(sorted(tags.items()))
build_html(
template="index.html", filename="index.html", content={"posts": sorted(POSTS)}
) # build index
build_html(template="about.html", filename="about.html") # build about
build_html(
template="tags.html", filename="tags.html", content={"tags": build_tags()}
) # build tags
shutil.copyfile("CHANGELOG.txt", os.path.join(PATH_BLOG, "changelog.txt"))
shutil.copyfile("README.txt", os.path.join(PATH_BLOG, "readme.txt"))
"""
STAGE OPTIMIZE
1. add lazy loading to all html <img> attributes
2. compress images if needed, use a file to track whether images were compressed before or not
3. minify html to further reduce size
"""
@timer
def add_lazy_loading():
# todo read all .html files in PATH_BLOG, add lazy loading, write them again.
soup = BeautifulSoup(html, "lxml")
for media in soup.find_all(["img", "source"]):
media.attrs["loading"] = "lazy"
@timer
def compress_images():
tinify.key = os.environ.get("key")
for image in os.listdir(PATH_MEDIA):
_, ext = os.path.splitext(image) # get file extension
if ext.lower() in [".png", ".jpg", ".jpeg"]: # only minify images!
image_path = os.path.join(PATH_MEDIA, image)
image_size = os.path.getsize(image_path)
i = tinify.from_file(image_path)
resized = i.resize(method="scale", width=MAX_MEDIA_WIDTH)
resized.to_file(image_path)
image_size_new = os.path.getsize(image_path)
write_log(
"compress_media",
f"resized [{image}] from {image_size}bytes to {image_size_new}bytes",
)
@timer
def minify_html():
pass
# todo https://stackoverflow.com/questions/5597094/compressminimize-html-from-python
#compress_images()
#TODO add ![[]] obisidian media handling! need to check type etc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment