Skip to content

Instantly share code, notes, and snippets.

@dustinknopoff
Last active January 8, 2023 19:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
Save dustinknopoff/0913e25d059f111f57045c904de25980 to your computer and use it in GitHub Desktop.
This is written expecting to be in the top level directory of a Zola project and can be run `deno run --allow-read=. --allow-write=. migrateToTaxonomies.ts`
import {
extract,
test as containsFrontmatter,
} from "https://deno.land/std@0.170.0/encoding/front_matter/any.ts";
import { walk } from "https://deno.land/std@0.170.0/fs/mod.ts";
import { stringify } from "npm:yaml@2.1.3"
async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)
}
const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])
const taxonomies = new Set(["tags"])
function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
const _difference = new Set(setA);
for (const elem of setB) {
_difference.delete(elem);
}
return _difference;
}
for await (const entry of walk("./content/articles", { includeDirs: false })) {
if (!entry.path.includes("_index")) {
console.log(entry.path);
const str = await Deno.readTextFile(entry.path);
let post;
if (containsFrontmatter(str)) {
post = extract(str);
} else {
post = { body: str, attrs: {} }
}
if (!post.attrs.extra) {
post.attrs.extra = {}
}
if (!post.attrs.taxonomies) {
post.attrs.taxonomies = {}
}
const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
if (diff.size > 0) {
for (const elem of diff) {
if (taxonomies.has(elem)) {
post.attrs.taxonomies[elem] = post.attrs[elem]
} else {
post.attrs.extra[elem] = post.attrs[elem]
}
delete post.attrs[elem]
}
}
await writeFile(entry.path, post.attrs, post.body)
}
}
@jpcaruana
Copy link

Great ! THanks for sharing.
Sadly it does not detect TOML frontmatter contained within +++ and not --- (as described in https://gohugo.io/content-management/front-matter/#front-matter-formats)

@dustinknopoff
Copy link
Author

It looks like there is a different path to import from for toml frontmatter https://deno.land/std@0.171.0/encoding/front_matter/toml.ts

@dustinknopoff
Copy link
Author

Ah, it still uses --- as the delimiter though 😅

@jpcaruana
Copy link

jpcaruana commented Jan 7, 2023

do you mean changing the export on L4 (https://gist.github.com/dustinknopoff/0913e25d059f111f57045c904de25980#file-migratetotaxonomies-ts-L4)? I had the same result. (I am trying to understand deno as I am not very versed in js.)

@jpcaruana
Copy link

and zola behaves as hugo here, expects --- to be YAML:

Error: Failed to serve the site
Error: Error when parsing front matter of section `xxxxcontent/posts/2020/09/08/assassin-royal/index.md`
Error: Reason: YAML deserialize error: Error("invalid type: string \"date = 2020-09-08T16:18:51+02:00 title = \\\"Lire le cycle de l'Assassin Royal, c'est compliqué\\\"\\n[taxonomies] tags = [\\\"livre\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"] categories = [\\\"test\\\", \\\"autre catégorie\\\", \\\"un\\\", \\\"deux\\\", \\\"trois\\\", \\\"quatre\\\", \\\"cing\\\", \\\"six\\\", \\\"sept\\\", \\\"huit\\\", \\\"neuf\\\", \\\"dix\\\", \\\"etc...\\\"]\\n[extra] twitter = \\\"https://twitter.com/jpcaruana/status/1303356472705921026\\\"\", expected struct PageFrontMatter", line: 2, column: 1)

@jpcaruana
Copy link

@jpcaruana
Copy link

looks like a bug to me

denoland/deno_std#3094

@dustinknopoff
Copy link
Author

dustinknopoff commented Jan 7, 2023

Here's an alternate version which overrides the std lib frontmatter to use +++ as the delimiters instead

NOTE: This will convert your frontmatter into YAML (which is still valid for Zola)

import {
    Extractor,
    Extract,
    Format,
    Parser,
    test as _test,
} from "https://deno.land/std@0.171.0/encoding/front_matter/mod.ts";
import { parse } from "https://deno.land/std@0.171.0/encoding/toml.ts";
import { walk } from "https://deno.land/std@0.171.0/fs/mod.ts";
import { stringify } from "npm:yaml@2.1.3"

function _extract<T>(
    str: string,
    rx: RegExp,
    parse: Parser,
): Extract<T> {
    const match = rx.exec(str);
    if (!match || match.index !== 0) {
        throw new TypeError("Unexpected end of input");
    }
    const frontMatter = match.at(-1)?.replace(/^\s+|\s+$/g, "") || "";
    const attrs = parse(frontMatter) as T;
    const body = str.replace(match[0], "");
    return { frontMatter, body, attrs };
}

function recognize(str: string, formats?: Format[]): Format {
    if (!formats) {
        formats = Object.keys(MAP_FORMAT_TO_RECOGNIZER_RX) as Format[];
    }

    const [firstLine] = str.split(/(\r?\n)/);

    for (const format of formats) {
        if (format === Format.UNKNOWN) {
            continue;
        }

        if (MAP_FORMAT_TO_RECOGNIZER_RX[format].test(firstLine)) {
            return format;
        }
    }

    return Format.UNKNOWN;
}

function createExtractor(
    formats: Partial<Record<Format, Parser>>,
): Extractor {
    const formatKeys = Object.keys(formats) as Format[];

    return function extract<T>(str: string): Extract<T> {
        const format = recognize(str, formatKeys);
        const parser = formats[format];

        if (format === Format.UNKNOWN || !parser) {
            throw new TypeError(`Unsupported front matter format`);
        }

        return _extract(str, MAP_FORMAT_TO_EXTRACTOR_RX[format], parser);
    };
}

type Delimiter = string | [begin: string, end: string];

function getBeginToken(delimiter: Delimiter): string {
    return Array.isArray(delimiter) ? delimiter[0] : delimiter;
}

function getEndToken(delimiter: Delimiter): string {
    return Array.isArray(delimiter) ? delimiter[1] : delimiter;
}

function createRegExp(...dv: Delimiter[]): [RegExp, RegExp] {
    const beginPattern = "(" + dv.map(getBeginToken).join("|") + ")";
    const pattern = "^(" +
        "\\ufeff?" + // Maybe byte order mark
        beginPattern +
        "$([\\s\\S]+?)" +
        "^(?:" + dv.map(getEndToken).join("|") + ")\\s*" +
        "$" +
        (Deno.build.os === "windows" ? "\\r?" : "") +
        "(?:\\n)?)";

    return [
        new RegExp("^" + beginPattern + "$", "im"),
        new RegExp(pattern, "im"),
    ];
}

const [RX_RECOGNIZE_TOML, RX_TOML] = createRegExp(
    ["\\+\\+\\+", "\\+\\+\\+"],
    "= toml =",
);


const MAP_FORMAT_TO_RECOGNIZER_RX: Omit<
    Record<Format, RegExp>,
    Format.UNKNOWN
> = {
    [Format.TOML]: RX_RECOGNIZE_TOML,
};
const MAP_FORMAT_TO_EXTRACTOR_RX: Omit<Record<Format, RegExp>, Format.UNKNOWN> =
{
    [Format.TOML]: RX_TOML,
};

const extract = createExtractor({
    [Format.TOML]: parse as Parser,
});

function test(str: string, formats?: Format[]): boolean {
    if (!formats) {
        formats = Object.keys(MAP_FORMAT_TO_EXTRACTOR_RX) as Format[];
    }

    for (const format of formats) {
        if (format === Format.UNKNOWN) {
            throw new TypeError("Unable to test for unknown front matter format");
        }

        const match = MAP_FORMAT_TO_EXTRACTOR_RX[format].exec(str);
        if (match?.index === 0) {
            return true;
        }
    }

    return false;
}

async function writeFile(path: string, attrs: { [key: string]: any }, body: string) {
    await Deno.writeTextFile(path, `---\n${stringify(attrs)}\n---\n\n${body}`)
}

const permittedTopLevelKeys = new Set(["title", "description", "updated", "weight", "draft", "slug", "path", "aliases", "in_search_index", "template", "taxonomies", "extra", "date"])

const taxonomies = new Set(["tags"])

function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
    const _difference = new Set(setA);
    for (const elem of setB) {
        _difference.delete(elem);
    }
    return _difference;
}


for await (const entry of walk("./", { includeDirs: false })) {
    if (entry.path.includes("sample")) {
        console.log(entry.path);
        const str = await Deno.readTextFile(entry.path);
        let post;
        if (test(str)) {
            post = extract(str);
        } else {
            post = { body: str, attrs: {} }
        }
        if (!post.attrs.extra) {
            post.attrs.extra = {}
        }
        if (!post.attrs.taxonomies) {
            post.attrs.taxonomies = {}
        }

        const diff = difference(new Set(Object.keys(post.attrs)), permittedTopLevelKeys)
        if (diff.size > 0) {
            for (const elem of diff) {
                if (taxonomies.has(elem)) {
                    post.attrs.taxonomies[elem] = post.attrs[elem]
                } else {
                    post.attrs.extra[elem] = post.attrs[elem]
                }
                delete post.attrs[elem]
            }
        }
        await writeFile(entry.path, post.attrs, post.body)
    }
}

@jpcaruana
Copy link

Thanks for the upgrade. I still have issues with the script, as regular working posts fail.

content/posts/2013/03/04/afpy.md
error: Uncaught Error: Parse error on line 1, column 26: Unexpected character: "+"
      throw new TOMLParseError(message);
            ^
    at parse (https://deno.land/std@0.171.0/encoding/_toml/parser.ts:890:13)
    at _extract (file:///xxxmigrate_taxonomies.ts:21:19)
    at extract (file:///xxx/migrate_taxonomies.ts:59:16)
    at file:///Usersjxxxmigrate_taxonomies.ts:153:20

I issued a PR on deno

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment