Tool for blog migration (Nikola to Hugo): migrate .rst and .md with .rst metadata or separate .meta file to single .md file with .toml metadata
from os import walk,path
import pypandoc
import sys
from collections import namedtuple
import queue
def filter_rst_meta(lines):
head = []
rest = []
for line in lines:
line = line.strip()
if line.startswith("..") and (":" in line) and line[line.index(":") + 1:].strip() != "":
return (head, rest)
def convert_rst_meta_to_toml_meta(headlines):
res = []
for line in headlines:
line = line.strip(".. ").strip().replace(":", "=", 1).replace("[", "").replace("]", "")
key, val = line.split("=")
if key in ["tags", "categories"]:
val = ",".join([f'"{i.strip()}"' for i in val.split(",") if i.strip() != ""])
val = f"[{val}]"
elif key in ["thumbnail", "image"]:
key = "images"
val = val.strip(": ")
val = f'["{val.strip()}"]'
elif key in ["date"]:
val = val.split(" ")[1]
val = f'"{val.strip()}"'
val = f'"{val.strip()}"'
return res
def process(fname, ctx):
_ , ext = path.splitext(fname)
keys = ctx.processors.keys()
if not ext in keys:
print("skipping ", fname)
with open(fname, "r", encoding="utf-8") as f:
ctx.processors[ext](fname, f.readlines(), ctx)
def rst_processor(fname, lines, ctx):
# print(">> processing rst: ", fname)
head, rest = filter_rst_meta(lines)
head = convert_rst_meta_to_toml_meta(head)
rest = pypandoc.convert_text(
"\n".join(rest), "md", format="rst")
full_text = "\n".join(head) + rest
base, ext = path.splitext(fname)
ctx.q.put((base, full_text))
def meta_processor(fname, lines, ctx):
# print(">> processing meta: ", fname)
head, rest = filter_rst_meta(lines)
head = convert_rst_meta_to_toml_meta(head)
base, ext = path.splitext(fname)
md_fname = base + ".md"
if not md_fname in ctx.filenames:
raise AttributeError(f"File {md_fname} not found")
with open(md_fname, "r", encoding="utf-8") as f:
rest = f.readlines()
full_text = "\n".join(head) + "".join(rest)
ctx.q.put((base, full_text))
def md_processor(fname, lines, ctx):
# print(">> processing md: ", fname)
base, ext = path.splitext(fname)
meta_fname = base + ".meta"
if meta_fname in ctx.filenames:
# will be picked up by meta_processor
head, rest = filter_rst_meta(lines)
head = convert_rst_meta_to_toml_meta(head)
base, ext = path.splitext(fname)
full_text = "\n".join(head) + "\n".join(rest)
ctx.q.put((base, full_text))
Context = namedtuple("Context", ["filenames", "processors", "q"])
def main():
if (len(sys.argv) < 2):
sys.exit(f"usage : { sys.argv[1] } [source dir] [dest dir]")
mypath = sys.argv[1]
if not path.exists(mypath):
sys.exit(f" Source path {mypath} does not exist")
destpath = sys.argv[2]
if not path.exists(destpath):
sys.exit(f" Dest path {destpath} does not exist")
filenames = next(walk(mypath), (None, None, []))[2]
filenames = [path.abspath(path.join(mypath, fname))
for fname in filenames]
result_queue = queue.Queue()
ctx = Context(
".rst" : rst_processor,
".meta": meta_processor,
".md" : md_processor,
for f in filenames:
process(f, ctx)
while not result_queue.empty():
n, ls = result_queue.get()
basename, filename = path.split(n)
fname = path.join(destpath, filename) + ".md"
write_file(fname, ls)
def write_file(fname, ls):
# print(f"----- {fname} ----\n{ls} \n\n")
with open(fname, "w") as f:
