Skip to content

Instantly share code, notes, and snippets.

Last active January 8, 2023 19:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
This is written expecting to be in the top level directory of a Zola project and can be run `deno run --allow-read=. --allow-write=. migrateToTaxonomies.ts`
import {
test as containsFrontmatter,
} from "";
import { walk } from "";
import { stringify } from "npm:yaml@2.1.3"
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])
const taxonomies = new Set(["tags"])
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
const _difference = new Set(setA);
for (const elem of setB) {
return _difference;
for await (const entry of walk("./content/articles", { includeDirs: false })) {
if (!entry.path.includes("_index")) {
const str = await Deno.readTextFile(entry.path);
let post;
if (containsFrontmatter(str)) {
post = extract(str);
} else {
post = { body: str, attrs: {} }
if (!post.attrs.extra) {
post.attrs.extra = {}
if (!post.attrs.taxonomies) {
post.attrs.taxonomies = {}
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
if (diff.size > 0) {
for (const elem of diff) {
if (taxonomies.has(elem)) {
post.attrs.taxonomies[elem] = post.attrs[elem]
} else {
post.attrs.extra[elem] = post.attrs[elem]
delete post.attrs[elem]
await writeFile(entry.path, post.attrs, post.body)
Copy link

Great ! THanks for sharing.
Sadly it does not detect TOML frontmatter contained within +++ and not --- (as described in

Copy link

It looks like there is a different path to import from for toml frontmatter

Copy link

Ah, it still uses --- as the delimiter though 😅

Copy link

jpcaruana commented Jan 7, 2023

do you mean changing the export on L4 ( I had the same result. (I am trying to understand deno as I am not very versed in js.)

Copy link

and zola behaves as hugo here, expects --- to be YAML:

Error: Failed to serve the site
Error: Error when parsing front matter of section `xxxxcontent/posts/2020/09/08/assassin-royal/`
Error: Reason: YAML deserialize error: Error("invalid type: string \"date = 2020-09-08T16:18:51+02:00 title = \\\"Lire le cycle de l'Assassin Royal, c'est compliqué\\\"\\n[taxonomies] tags = [\\\"livre\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"] categories = [\\\"test\\\", \\\"autre catégorie\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"]\\n[extra] twitter = \\\"\\\"\", expected struct PageFrontMatter", line: 2, column: 1)

Copy link

Copy link

looks like a bug to me


Copy link

dustinknopoff commented Jan 7, 2023

Here's an alternate version which overrides the std lib frontmatter to use +++ as the delimiters instead

NOTE: This will convert your frontmatter into YAML (which is still valid for Zola)

import {
    test as _test,
} from "";
import { parse } from "";
import { walk } from "";
import { stringify } from "npm:yaml@2.1.3"

function _extract<T>(
    str: string,
    rx: RegExp,
    parse: Parser,
): Extract<T> {
    const match = rx.exec(str);
    if (!match || match.index !== 0) {
        throw new TypeError("Unexpected end of input");
    const frontMatter =^\s+|\s+$/g, "") || "";
    const attrs = parse(frontMatter) as T;
    const body = str.replace(match[0], "");
    return { frontMatter, body, attrs };

function recognize(str: string, formats?: Format[]): Format {
    if (!formats) {
        formats = Object.keys(MAP_FORMAT_TO_RECOGNIZER_RX) as Format[];

    const [firstLine] = str.split(/(\r?\n)/);

    for (const format of formats) {
        if (format === Format.UNKNOWN) {

        if (MAP_FORMAT_TO_RECOGNIZER_RX[format].test(firstLine)) {
            return format;

    return Format.UNKNOWN;

function createExtractor(
    formats: Partial<Record<Format, Parser>>,
): Extractor {
    const formatKeys = Object.keys(formats) as Format[];

    return function extract<T>(str: string): Extract<T> {
        const format = recognize(str, formatKeys);
        const parser = formats[format];

        if (format === Format.UNKNOWN || !parser) {
            throw new TypeError(`Unsupported front matter format`);

        return _extract(str, MAP_FORMAT_TO_EXTRACTOR_RX[format], parser);

type Delimiter = string | [begin: string, end: string];

function getBeginToken(delimiter: Delimiter): string {
    return Array.isArray(delimiter) ? delimiter[0] : delimiter;

function getEndToken(delimiter: Delimiter): string {
    return Array.isArray(delimiter) ? delimiter[1] : delimiter;

function createRegExp(...dv: Delimiter[]): [RegExp, RegExp] {
    const beginPattern = "(" +"|") + ")";
    const pattern = "^(" +
        "\\ufeff?" + // Maybe byte order mark
        beginPattern +
        "$([\\s\\S]+?)" +
        "^(?:" +"|") + ")\\s*" +
        "$" +
        ( === "windows" ? "\\r?" : "") +

    return [
        new RegExp("^" + beginPattern + "$", "im"),
        new RegExp(pattern, "im"),

const [RX_RECOGNIZE_TOML, RX_TOML] = createRegExp(
    ["\\+\\+\\+", "\\+\\+\\+"],
    "= toml =",

    Record<Format, RegExp>,
> = {
const MAP_FORMAT_TO_EXTRACTOR_RX: Omit<Record<Format, RegExp>, Format.UNKNOWN> =
    [Format.TOML]: RX_TOML,

const extract = createExtractor({
    [Format.TOML]: parse as Parser,

function test(str: string, formats?: Format[]): boolean {
    if (!formats) {
        formats = Object.keys(MAP_FORMAT_TO_EXTRACTOR_RX) as Format[];

    for (const format of formats) {
        if (format === Format.UNKNOWN) {
            throw new TypeError("Unable to test for unknown front matter format");

        const match = MAP_FORMAT_TO_EXTRACTOR_RX[format].exec(str);
        if (match?.index === 0) {
            return true;

    return false;

async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
    await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)

const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])

const taxonomies = new Set(["tags"])

function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
    const _difference = new Set(setA);
    for (const elem of setB) {
    return _difference;

for await (const entry of walk("./", { includeDirs: false })) {
    if (entry.path.includes("sample")) {
        const str = await Deno.readTextFile(entry.path);
        let post;
        if (test(str)) {
            post = extract(str);
        } else {
            post = { body: str, attrs: {} }
        if (!post.attrs.extra) {
            post.attrs.extra = {}
        if (!post.attrs.taxonomies) {
            post.attrs.taxonomies = {}

        const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
        if (diff.size > 0) {
            for (const elem of diff) {
                if (taxonomies.has(elem)) {
                    post.attrs.taxonomies[elem] = post.attrs[elem]
                } else {
                    post.attrs.extra[elem] = post.attrs[elem]
                delete post.attrs[elem]
        await writeFile(entry.path, post.attrs, post.body)

Copy link

Thanks for the upgrade. I still have issues with the script, as regular working posts fail.

error: Uncaught Error: Parse error on line 1, column 26: Unexpected character: "+"
      throw new TOMLParseError(message);
    at parse (
    at _extract (file:///xxxmigrate_taxonomies.ts:21:19)
    at extract (file:///xxx/migrate_taxonomies.ts:59:16)
    at file:///Usersjxxxmigrate_taxonomies.ts:153:20

I issued a PR on deno

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment