Skip to content

Instantly share code, notes, and snippets.

@ToJans
Last active February 17, 2022 16:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ToJans/7edd74935025fd03c2735656e1d45669 to your computer and use it in GitHub Desktop.
Save ToJans/7edd74935025fd03c2735656e1d45669 to your computer and use it in GitHub Desktop.
Minimalistic example to parse javascript.
const TokenExpressions = {
comment: /^((\/\/[^\n]+)|(\/\*[^(\*\/)]+)\*\/)/,
strings: /^("(\\"|[^"])*")/,
multiline_strings: /^(`[^\`]+`)/,
whitespace: /^([\s\n\r]+)/,
brace: /^[\(\)]/,
curlyBrace: /^[\{\}]/,
array: /^[\[\]]/,
comma: /^\,/,
operator: /^(\+|-|\*|\/|=|>|<|>=|<=|&|\||%|!|\^|)/,
dot: /^\./,
number: /^\d*(\.[\d]+)?/,
endOfStatement: /^(;|\n)/,
identifier: /^\w+/,
token: /^[^\s\w]/
}
type TokenType = keyof typeof TokenExpressions;
type Token = [TokenType, string];
type ParseNodeItem = Token | ["group", ParseNodeItem[]];
function* tokenize(src: string) {
/* some intermediate comment
blah
*/
const tokens: Token[] = [];
// do all kinds of stuff: & a+2
while (src.length) {
let item: Token | null = null;
for (const key of Object.keys(TokenExpressions)) {
const res = src.match(TokenExpressions[key as TokenType]);
if (res && res[0].length) {
item = [key as TokenType, res[0]]
break;
}
}
if (item) {
src = src.slice(item[1].length)
yield item;
} else {
throw `unable to parse ****\n${src.length > 30 ? src.slice(30) + "..." : src}\n***`;
}
}
}
function parse(tokens: Iterable<[TokenType, string]>) {
let node: ParseNodeItem[] = [];
const stack: ParseNodeItem[][] = [];
for (const token of tokens) {
switch (token[0]) {
case "array":
case "brace":
case "curlyBrace":
if (token[1].match(/^(\{|\[|\()/)) {
const newNode: ParseNodeItem[] = [token];
node.push(["group", newNode as any])
stack.push(node as any);
node = newNode;
} else {
node.push(token as any);
node = stack.pop() as any;
}
break;
default:
node.push(token);
}
}
return node;
}
function debugHTML(node: ParseNodeItem[]): string {
let outStr = "";
for (let i = 0; i < node.length; i++) {
let n = node[i];
outStr+=`<span class="${n[0]}">`
if (n[0] == "group") {
outStr+=`<span class="group">${debugHTML(n[1])}</span>`;
} else {
outStr += n[1];
}
outStr+="</span>"
}
return outStr;
}
const tokenized = new Array(...tokenize(tokenize.toString()))
console.log(parse(tokenized));
const $style = document.createElement("style");
$style.setAttribute("type","text/css");
$style.innerHTML = `
.root span {
margin: 0em;
padding: 0.1em;
display: inline;
line-height: 2;
white-space:pre;
font-family: Arial, Helvetica, sans-serif;
}
.identifier {
color: #456789
}
.brace, .curlyBrace, .array {
color: #789456;
}
.operator {
font-weight: bold;
}
.comment {
color: #999;
display: inline-block;
}
.comment {
color: #999;
display: inline-block;
}
.multiline_strings {
font-style: italic;
font-weight:bold;
}
.group {
display: inline-block;
border: 1px gray dashed;
padding: 1em;
margin: 1em;
}
.group:first-child > span:first-child,
.group:first-child > span:last-child
{
color: #783241;
font-weight:bold;
}
.root > span {
background-color: #eef;
}
.root > span > span {
background-color: #eff;
}
.root > span > span {
background-color: #fef;
}
.root > span > span > span {
background-color: #ffe;
}
.root > span > span > span > span{
background-color: #fee;
}
.root > span > span > span > span > span{
background-color: #ccf;
}
.root > span > span > span > span > span > span {
background-color: #fcc;
}
.root > span > span > span > span > span > span > span {
background-color: #cfc;
}
.root > span > span > span > span > span > span > span > span{
background-color: #ffc;
}
.root > span > span > span > span > span > span > span > span{
background-color: #cff;
}
.root > span > span > span > span > span > span > span > span{
background-color: #fcf;
}
.root > span > span > span > span > span > span > span > span > span {
background-color: #9ff;
}
.root > span > span > span > span > span > span > span > span > span > span {
background-color: #ff9;
}
.root > span > span > span > span > span > span > span > span > span > span > span {
background-color: #f9f;
}
`;
//document.head.append($style);
const $div = document.createElement("div");
$div.classList.add("root");
$div.innerHTML = debugHTML(parse(tokenized));
//document.body.append($div);
console.log(debugHTML(parse(tokenized)))
@ToJans
Copy link
Author

ToJans commented Feb 17, 2022

This is the output it generates:
image

@ToJans
Copy link
Author

ToJans commented Feb 17, 2022

Updated, now looks like this:
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment