Skip to content

Instantly share code, notes, and snippets.

@pedro-psb
Created June 10, 2024 19:19
Show Gist options
  • Save pedro-psb/790e7150f32337306b452ce3e32b4e50 to your computer and use it in GitHub Desktop.
Save pedro-psb/790e7150f32337306b452ce3e32b4e50 to your computer and use it in GitHub Desktop.
Sanitize pulpproject.org posts so they can work with mkdocs-material blog plugin
import re
from pathlib import Path
def read_files():
return list(Path("staging_docs/sections/blog/posts/").glob("*.md"))
def fix_blog_posts():
"""
Problems:
* add heading zero to filenames missing it: yyyy-mm-d
* delete "- date:" lines from frontmatter
* add new based on filename
* quote floats that should be strings, like "- 2.0"
"""
for file in read_files():
this_date_split = file.name.split("-")[:3]
if len(this_date_split[-1]) == 1:
this_date_split[-1] = "0" + this_date_split[-1]
this_date = "-".join(this_date_split)
raw = file.read_text()
# fix floats
raw = re.sub(r"\n\s+-\s*(\d+\.\d+)", r'\n - "\1"', raw, 1)
# use aware TZ
if "- date:" not in raw:
raw = re.sub(r"---\s*\n", f"---\ndate: {this_date}T20:55:50+00:00\n", raw, 1)
# add excerpt separator
raw = re.sub(r"\n---\s*\n", "\n---\n<!-- more -->\n", raw, 1)
file.write_text(raw)
fix_blog_posts()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment