Skip to content

Instantly share code, notes, and snippets.

@mindplay-dk
Last active May 25, 2021 11:18
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mindplay-dk/5167cf7e123064c287c65174593a52ab to your computer and use it in GitHub Desktop.
Save mindplay-dk/5167cf7e123064c287c65174593a52ab to your computer and use it in GitHub Desktop.
Non-validating code-golfed-to-shit SAX-style HTML 5 (ish) parser in < 0.5k
type Matcher = [RegExp, { (value: string): Matcher[] }];
type VNode = {
name: string;
attrs: { [attrName: string]: any };
children: VChild[];
};
type VChild = VNode | string;
const Parser = (
onStart: { (name: string, attrs: VNode["attrs"]): void },
onEnd: { (name: string): void },
onText: { (text: string): void }
) => {
let _tagName: string;
let _attrs: VNode["attrs"];
const tagParser: Matcher[] = [
[/^<([\w\-]+)/, tagName => {
_tagName = tagName;
_attrs = {};
return attrParser;
}],
[/^<\/([\w\-]+)>/, tagName => {
onEnd(tagName);
return tagParser;
}],
[/^([^<]+)/, text => {
onText(text);
return tagParser;
}]
];
const attrParser: Matcher[] = [
[/^([\w\-]+)\s*=/, attrName => ([
[/^(["'])/, quote => ([
[quote === '"' ? /^([^"]*)"/ : /^([^']*)'/, value => {
_attrs[attrName] = value;
return attrParser;
}]]
)]]
)],
// TODO add support for void attributes
[/^(\/?)>/, closed => {
onStart(_tagName, _attrs);
if (closed || /^(area|base|br|col|command|embed|hr|img|input|link|meta|param|source|track|wbr)$/.test(_tagName)) {
onEnd(_tagName);
}
return tagParser;
}]
];
let parser: Matcher[] = tagParser;
let offset = 0;
return (input: any) => {
if (typeof input === "string") {
offset = 0;
next:
while (offset < input.length - 1) {
for (const matcher of parser) {
const match = matcher[0].exec(input.substr(offset));
if (match) {
//console.log("matched ", matcher[0], " -> ", match);
parser = [
[/^\s+/, () => parser],
...matcher[1](match[1])
];
offset += match[0].length;
continue next;
}
}
throw `error at ${offset} in ${input}`;
}
} else {
// ...
}
};
};
// TEST:
const parser = Parser(
(name, attrs) => console.log("start", name, attrs),
(name) => console.log("end", name),
(text) => console.log(name)
);
parser(`
<div id="foo">
Hello:
<a href="/foo" class="bar">Link</a>
<br/>
Hi!
</div>
`)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment