-
-
Save mindplay-dk/5167cf7e123064c287c65174593a52ab to your computer and use it in GitHub Desktop.
Non-validating code-golfed-to-shit SAX-style HTML 5 (ish) parser in < 0.5k
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Matcher = [RegExp, { (value: string): Matcher[] }]; | |
type VNode = { | |
name: string; | |
attrs: { [attrName: string]: any }; | |
children: VChild[]; | |
}; | |
type VChild = VNode | string; | |
const Parser = ( | |
onStart: { (name: string, attrs: VNode["attrs"]): void }, | |
onEnd: { (name: string): void }, | |
onText: { (text: string): void } | |
) => { | |
let _tagName: string; | |
let _attrs: VNode["attrs"]; | |
const tagParser: Matcher[] = [ | |
[/^<([\w\-]+)/, tagName => { | |
_tagName = tagName; | |
_attrs = {}; | |
return attrParser; | |
}], | |
[/^<\/([\w\-]+)>/, tagName => { | |
onEnd(tagName); | |
return tagParser; | |
}], | |
[/^([^<]+)/, text => { | |
onText(text); | |
return tagParser; | |
}] | |
]; | |
const attrParser: Matcher[] = [ | |
[/^([\w\-]+)\s*=/, attrName => ([ | |
[/^(["'])/, quote => ([ | |
[quote === '"' ? /^([^"]*)"/ : /^([^']*)'/, value => { | |
_attrs[attrName] = value; | |
return attrParser; | |
}]] | |
)]] | |
)], | |
// TODO add support for void attributes | |
[/^(\/?)>/, closed => { | |
onStart(_tagName, _attrs); | |
if (closed || /^(area|base|br|col|command|embed|hr|img|input|link|meta|param|source|track|wbr)$/.test(_tagName)) { | |
onEnd(_tagName); | |
} | |
return tagParser; | |
}] | |
]; | |
let parser: Matcher[] = tagParser; | |
let offset = 0; | |
return (input: any) => { | |
if (typeof input === "string") { | |
offset = 0; | |
next: | |
while (offset < input.length - 1) { | |
for (const matcher of parser) { | |
const match = matcher[0].exec(input.substr(offset)); | |
if (match) { | |
//console.log("matched ", matcher[0], " -> ", match); | |
parser = [ | |
[/^\s+/, () => parser], | |
...matcher[1](match[1]) | |
]; | |
offset += match[0].length; | |
continue next; | |
} | |
} | |
throw `error at ${offset} in ${input}`; | |
} | |
} else { | |
// ... | |
} | |
}; | |
}; | |
// TEST: | |
const parser = Parser( | |
(name, attrs) => console.log("start", name, attrs), | |
(name) => console.log("end", name), | |
(text) => console.log(name) | |
); | |
parser(` | |
<div id="foo"> | |
Hello: | |
<a href="/foo" class="bar">Link</a> | |
<br/> | |
Hi! | |
</div> | |
`) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment