Skip to content

Instantly share code, notes, and snippets.

@andymatuschak
Last active May 9, 2021 05:08
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save andymatuschak/35be2a2041eda6773347a61ce75cb641 to your computer and use it in GitHub Desktop.
Save andymatuschak/35be2a2041eda6773347a61ce75cb641 to your computer and use it in GitHub Desktop.
parsing SRS prompts from Markdown
import mdast from "mdast";
import remarkParse from "remark-parse";
import remarkStringify from "remark-stringify";
import unified from "unified";
import unist from "unist";
import { clozeNodeType, ClozePromptNode } from "./index";
// TODO: don't match clozes inside code and html blocks
const clozeRegexp = /^{(.+?)}/;
export default function clozePlugin(this: unified.Processor) {
function clozeTokenizer(
this: remarkParse.Parser & {
tokenizeInline: (
content: string,
now: {
line: number;
column: number;
offset: number;
}
) => mdast.PhrasingContent[];
},
eat: remarkParse.Eat & {
now: () => {
line: number;
column: number;
offset: number;
};
},
value: string
) {
const match = clozeRegexp.exec(value);
if (match) {
const now = eat.now();
now.column += 1;
now.offset += 1;
const children = this.tokenizeInline(match[1], now);
const output: ClozePromptNode = {
type: clozeNodeType,
children
};
return eat(match[0])(output);
}
}
clozeTokenizer.locator = (value: string, fromIndex: number) => {
return value.indexOf("{", fromIndex);
};
const parserPrototype = this.Parser.prototype as remarkParse.Parser;
parserPrototype.inlineTokenizers.clozePrompt = clozeTokenizer as remarkParse.Tokenizer;
parserPrototype.inlineMethods.splice(
parserPrototype.inlineMethods.indexOf("text"),
0,
"clozePrompt"
);
const compilerPrototype = this.Compiler.prototype as remarkStringify.Compiler;
compilerPrototype.visitors[clozeNodeType] = clozePromptCompiler as (
node: unist.Node
) => string;
}
function clozePromptCompiler(
this: remarkStringify.Compiler & {
all: (node: unist.Node) => string[];
},
node: ClozePromptNode
): string {
const content = this.all(node).join("");
return `{${content}}`;
}
import unist from "unist";
import mdast from "mdast";
import parents, { NodeWithParent } from "unist-util-parents";
import { selectAll } from "unist-util-select";
import { backlinksNodeType } from "../backlinksPlugin";
import { JsonMap } from "../util/JSONTypes";
export const clozeNodeType = "incremental-thinking-cloze";
export interface ClozePromptNode extends unist.Node {
type: typeof clozeNodeType;
children: mdast.PhrasingContent[];
}
export const clozePromptType = "cloze";
export interface ClozePrompt extends JsonMap {
type: typeof clozePromptType;
block: mdast.BlockContent & JsonMap; // Except note that PhrasingContent can include type ClozePromptNode.
}
export const qaPromptNodeType = "incremental-thinking-QA";
export interface QAPromptNode extends unist.Node {
type: typeof qaPromptNodeType;
question: mdast.Parent;
answer: mdast.Parent;
}
export const qaPromptType = "qaPrompt";
export interface QAPrompt extends JsonMap {
type: typeof qaPromptType;
question: mdast.Parent & JsonMap;
answer: mdast.Parent & JsonMap;
}
export type Prompt = ClozePrompt | QAPrompt;
export function findAllPrompts(tree: unist.Node): Prompt[] {
const treeWithParents = parents(tree);
const clozeNodes = selectAll(
clozeNodeType,
treeWithParents
) as NodeWithParent[];
const clozePrompts: ClozePrompt[] = [];
const visitedClozePromptBlocks: Set<mdast.BlockContent> = new Set();
for (const node of clozeNodes) {
let parent: NodeWithParent | null = node.parent;
while (parent && !isBlockContent(parent)) {
parent = parent.parent;
}
if (
parent &&
!promptNodeHasUnsupportedParent(node) &&
!visitedClozePromptBlocks.has(parent)
) {
visitedClozePromptBlocks.add(parent);
clozePrompts.push({
type: "cloze",
block: parent as mdast.BlockContent & JsonMap
});
}
}
const qaPrompts = selectAll(qaPromptNodeType, treeWithParents)
.filter(n => !promptNodeHasUnsupportedParent(n as NodeWithParent))
.map(n => {
const qaPromptNode = n as QAPromptNode;
const qaPrompt: QAPrompt = {
type: "qaPrompt",
question: qaPromptNode.question as mdast.Parent & JsonMap,
answer: qaPromptNode.answer as mdast.Parent & JsonMap
};
return qaPrompt;
});
return (clozePrompts as Prompt[]).concat(qaPrompts);
}
export function getClozeNodesInClozePrompt(
clozePrompt: ClozePrompt
): ClozePromptNode[] {
return selectAll(clozeNodeType, clozePrompt.block) as ClozePromptNode[];
}
function promptNodeHasUnsupportedParent(promptNode: NodeWithParent): boolean {
let node = promptNode.parent;
while (node) {
if (node.type === backlinksNodeType) {
return true;
}
node = node.parent;
}
return false;
}
const blockTypes = new Set([
"paragraph",
"heading",
"thematicBreak",
"blockquote",
"list",
"table",
"html",
"code"
]);
function isBlockContent(node: unist.Node): node is mdast.BlockContent {
return blockTypes.has(node.type);
}
import mdast from "mdast";
import remarkStringify from "remark-stringify";
import unified from "unified";
import unist from "unist";
import parents, { NodeWithParent } from "unist-util-parents";
import { selectAll } from "unist-util-select";
import { QAPromptNode, qaPromptNodeType } from "./index";
// TODO: don't match QA prompts inside code and html blocks
export default function qaPromptPlugin(this: unified.Processor) {
const compilerPrototype = this.Compiler.prototype as remarkStringify.Compiler;
compilerPrototype.visitors[qaPromptNodeType] = qaPromptCompiler as (
node: unist.Node
) => string;
return extractQAPromptNodes;
}
function qaPromptCompiler(
this: remarkStringify.Compiler & {
all: (node: unist.Node) => string[];
},
node: QAPromptNode
): string {
throw new Error("Unimplemented");
}
const questionPrefix = "Q. ";
const answerPrefix = "A. ";
const answerSplitRegexp = new RegExp(`\n${answerPrefix}`, "m");
function extractQAPromptNodes(node: unist.Node): unist.Node {
const nodeWithParents = parents(node);
const answerNodes = selectAll(
`paragraph>text[value^='${answerPrefix}']`,
nodeWithParents
) as NodeWithParent[];
for (const answerNode of answerNodes) {
const parent = answerNode.parent!.parent!.node;
const answerParagraphIndex = parent.children.indexOf(
answerNode.parent!.node
);
if (answerParagraphIndex === -1 || answerParagraphIndex === 0) {
throw new Error(
`Unexpected QA prompt answer node: ${JSON.stringify(
answerNode,
null,
"\t"
)}`
);
}
const questionParagraphNode = parent.children[
answerParagraphIndex - 1
] as mdast.Paragraph;
if (questionParagraphNode.type === "paragraph") {
const questionTextNode = questionParagraphNode.children[0] as mdast.Text;
if (
questionParagraphNode.children.length === 1 &&
questionTextNode.type === "text"
) {
if (questionTextNode.value.startsWith(questionPrefix)) {
// Now we'll strip the prefixes off.
const answerParagraphNode = parent.children[
answerParagraphIndex
] as mdast.Paragraph;
questionTextNode.value = questionTextNode.value.slice(
questionPrefix.length
);
const answerTextNode = answerParagraphNode.children[0] as mdast.Text;
answerTextNode.value = answerTextNode.value.slice(
answerPrefix.length
);
const qaPromptNode: QAPromptNode = {
type: qaPromptNodeType,
question: questionParagraphNode,
answer: answerParagraphNode
};
parent.children.splice(answerParagraphIndex - 1, 2, qaPromptNode);
}
}
}
}
const questionNodes = selectAll(
`paragraph>text[value^='${questionPrefix}']`,
nodeWithParents
) as NodeWithParent[];
for (const questionNode of questionNodes) {
const paragraphNode = questionNode.parent!.node as mdast.Paragraph;
const splitNodeIndex = paragraphNode.children.findIndex(
node =>
node.type === "text" &&
answerSplitRegexp.test((node as mdast.Text).value)
);
if (splitNodeIndex === -1) {
continue;
}
const splitNode = paragraphNode.children[splitNodeIndex] as mdast.Text;
const match = splitNode.value.match(answerSplitRegexp)!;
const preSplitString = splitNode.value.slice(0, match.index!);
const postSplitString = splitNode.value.slice(match.index!);
let questionPhrasingNodes = paragraphNode.children.slice(0, splitNodeIndex);
let answerPhrasingNodes = paragraphNode.children.slice(splitNodeIndex);
if (preSplitString !== "") {
// We've gotta split that node.
questionPhrasingNodes.push({
type: "text",
value: preSplitString
});
answerPhrasingNodes[0].value = postSplitString;
}
(questionPhrasingNodes[0] as mdast.Text).value = (questionPhrasingNodes[0] as mdast.Text).value.slice(
questionPrefix.length
);
(answerPhrasingNodes[0] as mdast.Text).value = (answerPhrasingNodes[0] as mdast.Text).value.slice(
answerPrefix.length + 1 // add 1 for the newline
);
const qaPromptNode: QAPromptNode = {
type: qaPromptNodeType,
question: { type: "paragraph", children: questionPhrasingNodes },
answer: { type: "paragraph", children: answerPhrasingNodes }
};
const paragraphContainer = questionNode.parent!.parent!
.node as unist.Parent;
paragraphContainer.children.splice(
paragraphContainer.children.indexOf(paragraphNode),
1,
qaPromptNode
);
}
return node;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment