Skip to content

Instantly share code, notes, and snippets.

@TarVK
Created August 11, 2020 23:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TarVK/3782aca1299a0fc15032941343fbf973 to your computer and use it in GitHub Desktop.
Save TarVK/3782aca1299a0fc15032941343fbf973 to your computer and use it in GitHub Desktop.
Wrote a very simple arithmetic parser in javascript to test the general techique
/*******************
* All helper code *
*******************/
/**
* Tokenizes the input text using the given rules
* @param {string} text The text to tokenize
* @param {{[key: string]: RegExp}} rules The rules
* @returns {{type: string, value: string, index: number}[]} The tokens
*/
function tokenize(text, rules) {
const ruleNames = Object.keys(rules);
let index = 0;
const tokens = [];
outer: while (text.length > 0) {
for (let i = 0; i < ruleNames.length; i++) {
const ruleName = ruleNames[i];
const rule = rules[ruleName];
// Try to match the rule
const match = rule.exec(text);
if (match && match.index == 0) {
// If the rule matches, store the token
tokens.push({ type: ruleName, value: match[0], index });
// Remove the text, and continue tokenizing the remaining text
text = text.substring(match[0].length);
index += match[0].length;
continue outer;
}
}
// If no rule matches the text, throw some error
throw Error("Unexpected token " + text[0] + " at index " + index);
}
return tokens;
}
// The tokens that are analyzed
let tokens;
/**
* Consumes a token
* @param {string} token The token to consume
* @throws If the expected token wasn't found
* @returns {string} The value of the token
*/
function consume(token) {
const firstToken = tokens.shift(); // Get the first token
if (!firstToken || firstToken.type != token)
throw Error(
"Unexpected token, found: " + firstToken.type + " but expected: " + token
);
return firstToken.value;
}
/**
* Checks whether the first token is of the given type
* @param {string} token The token that is expected
* @returns {boolean} Whether the expected token was found
*/
function peek(token) {
return tokens[0] && tokens[0].type == token;
}
/**
* Combines peek and consume, consuming a token only if matched, without throwing an error if not
* @param {string} token The token that is expected
* @returns {false|string} Whether the expected token was found
*/
function optConsume(token) {
const matched = tokens[0] && tokens[0].type == token;
if (matched) {
return consume(token);
}
return false;
}
/***********************************
* All the lexer and grammar rules *
***********************************/
const lexer = {
lBracket: /\(/,
rBracket: /\)/,
value: /\d*\.?\d+/,
add: /\+/,
sub: /\-/,
mul: /\*/,
div: /\//,
};
function expression() {
let res = term();
let loop = true;
do {
if (optConsume("add")) {
res += term();
} else if (optConsume("sub")) {
res -= term();
} else {
loop = false;
}
} while (loop);
return res;
}
function term() {
let res = factor();
let loop = true;
do {
if (optConsume("mul")) {
res *= factor();
} else if (optConsume("div")) {
res /= factor();
} else {
loop = false;
}
} while (loop);
return res;
}
function factor() {
let res;
if (peek("value")) {
res = parseFloat(consume("value"));
} else {
consume("lBracket");
res = expression();
consume("rBracket");
}
return res;
}
tokens = tokenize("3*8/2*(2+2+3)", lexer);
let result = expression();
console.log(result);
@TarVK
Copy link
Author

TarVK commented Aug 12, 2020

Also made a more complete js expressions parser.
Unfortunately, due to the lack of a syntax tree creation step, true || console.log("test") will still log test instead of canceling

/*******************
 * All helper code *
 *******************/
/**
 * @typedef {Object} Token
 * @property {string} type The type of token
 * @property {string} value The textual value of the token
 * @property {number} index The index of the token in the input
 */

/**
 * Tokenizes the input text using the given rules
 * @param {string} text The text to tokenize
 * @param {{[key: string]: RegExp}} rules The rules
 * @returns {Token[]} The tokens
 */
function tokenize(text, rules) {
  const ruleNames = Object.keys(rules);
  let index = 0;
  const tokens = [];
  outer: while (text.length > 0) {
    for (let i = 0; i < ruleNames.length; i++) {
      const ruleName = ruleNames[i];
      const rule = rules[ruleName];

      // Try to match the rule
      const match = rule.exec(text);
      if (match && match.index == 0) {
        // If the rule matches, store the token
        tokens.push({ type: ruleName, value: match[0], index });

        // Remove the text, and continue tokenizing the remaining text
        text = text.substring(match[0].length);
        index += match[0].length;
        continue outer;
      }
    }

    // If no rule matches the text, throw some error
    throw Error("Unexpected token " + text[0] + " at index " + index);
  }
  return tokens;
}

/**
 * Removes the given token type from the list of tokens
 * @param {Token[]} tokens The input tokens
 * @param {string} rem The token type to remove
 * @returns {Token[]} The tokens with the given type removed
 */
function removeTokens(tokens, rem) {
  return tokens.filter(({ type }) => type != rem);
}

// The tokens that are analyzed
let tokens;
// Variables accessible to the text being evaluated
let vars;

/**
 * Consumes a token
 * @param {string} token The token to consume
 * @throws If the expected token wasn't found
 * @returns {string} The value of the token
 */
function consume(token) {
  const firstToken = tokens.shift(); // Get the first token
  if (!firstToken || firstToken.type != token)
    throw Error(
      "Unexpected token, expected '" +
        token +
        "' but found '" +
        firstToken.type +
        "' at index " +
        firstToken.index
    );
  return firstToken.value;
}

/**
 * Checks whether the first token is of the given type
 * @param {string} token The token that is expected
 * @returns {boolean} Whether the expected token was found
 */
function peek(token) {
  return tokens[0] && tokens[0].type == token;
}

/**
 * Combines peek and consume, consuming a token only if matched, without throwing an error if not
 * @param {string} token The token that is expected
 * @returns {false|string} Whether the expected token was found
 */
function optConsume(token) {
  const matched = tokens[0] && tokens[0].type == token;
  if (matched) {
    return consume(token);
  }
  return false;
}

/** A replacement for the undefined constant, to not clash with js undefined */
const Undefined = Symbol("undefined");

/***********************************
 * All the lexer and grammar rules *
 ***********************************/
const lexer = {
  leftBracket: /\(/,
  rightBracket: /\)/,
  leftSquareBracket: /\[/,
  rightSquareBracket: /\]/,
  leftCurlyBracket: /\{/,
  rightCurlyBracket: /\}/,
  colon: /\:/,
  questionMark: /\?/,
  number: /\d*\.?\d+/,
  boolean: /(true|false)(?!\w)/,
  var: /\w+/,
  add: /\+/,
  subtract: /\-/,
  multiply: /\*/,
  divide: /\//,
  dot: /\./,
  comma: /\,/,
  space: /\s+/,
  string: /(\"(\\\\|\\\"|[^"])*\")|(\'(\\\\|\\\'|[^'])*\')/,
  lessThanEqual: /<=/,
  greaterThanEqual: />=/,
  lessThan: /</,
  greaterThan: />/,
  strictEqual: /===/,
  equal: /==/,
  strictNotEqual: /!==/,
  notEqual: /!=/,
  not: /!/,
  or: /\|\|/,
  and: /&&/,
};

function expression() {
  let res = logic();
  if (optConsume("questionMark")) {
    let ifTrue = expression();
    consume("colon");
    let ifFalse = expression();
    res = res ? ifTrue : ifFalse;
  }
  return res;
}

function logic() {
  let res = clause();
  let loop = true;
  do {
    if (optConsume("or")) {
      const c = clause(); // Can't break out if a value is truthy :(
      res = res || c;
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function clause() {
  let res = subClause();
  let loop = true;
  do {
    if (optConsume("and")) {
      const c = subClause(); // Can't break out if a value is falsy :(
      res = res && c;
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function subClause() {
  let res = comparee();
  let loop = true;
  do {
    if (optConsume("lessThan")) {
      res = res < comparee();
    } else if (optConsume("lessThanEqual")) {
      res = res <= comparee();
    } else if (optConsume("greaterThan")) {
      res = res > comparee();
    } else if (optConsume("greaterThanEqual")) {
      res = res >= comparee();
    } else if (optConsume("equal")) {
      res = res == comparee();
    } else if (optConsume("notEqual")) {
      res = res != comparee();
    } else if (optConsume("strictEqual")) {
      res = res === comparee();
    } else if (optConsume("strictNotEqual")) {
      res = res !== comparee();
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function comparee() {
  let res = term();
  let loop = true;
  do {
    if (optConsume("add")) {
      res += term();
    } else if (optConsume("subtract")) {
      res -= term();
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function term() {
  let res = factor();
  let loop = true;
  do {
    if (optConsume("multiply")) {
      res *= factor();
    } else if (optConsume("divide")) {
      res /= factor();
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function factor() {
  let res;

  let invert = undefined;
  while (optConsume("not")) invert = !invert;

  if (peek("number")) {
    res = parseFloat(consume("number"));
  } else if (peek("var")) {
    res = vars[consume("var")];
  } else if (peek("string")) {
    res = string();
  } else if (peek("boolean")) {
    res = consume("boolean") == "true";
  } else if (peek("leftSquareBracket")) {
    res = array();
  } else if (peek("leftCurlyBracket")) {
    res = object();
  } else {
    consume("leftBracket");
    res = expression();
    consume("rightBracket");
  }

  if (invert) res = !res;
  if (invert == false) res = !!res; // invert defaults to undefined

  return manyAccessors(res);
}

function manyAccessors(value) {
  let res = value;
  let loop = true;
  do {
    let newRes = Undefined;
    if (newRes == Undefined) newRes = optProperty(res);
    if (newRes == Undefined) newRes = optIndexer(res);
    if (newRes == Undefined) newRes = optCall(res);

    if (newRes != Undefined) {
      res = newRes;
    } else {
      loop = false;
    }
  } while (loop);

  return res;
}

function optProperty(value) {
  if (optConsume("dot")) {
    let res = value[consume("var")];
    return res instanceof Function ? res.bind(value) : res;
  }
  return Undefined;
}

function optIndexer(value) {
  if (optConsume("leftSquareBracket")) {
    const res = value[expression()];
    consume("rightSquareBracket");
    return res;
  }
  return Undefined;
}

function optCall(value) {
  if (optConsume("leftBracket")) {
    let args = [];
    while (!peek("rightBracket")) {
      if (args.length > 0) consume("comma");
      args.push(expression());
    }
    let res = value(...args);
    consume("rightBracket");
    return res;
  }
  return Undefined;
}

function array() {
  let res = [];
  consume("leftSquareBracket");
  while (!peek("rightSquareBracket")) {
    if (res.length > 0) consume("comma");
    res.push(expression());
  }
  consume("rightSquareBracket");
  return res;
}

function object() {
  let res = {};
  let first = true;
  consume("leftCurlyBracket");
  while (!peek("rightCurlyBracket")) {
    if (!first) consume("comma");

    let name;
    if (optConsume("leftSquareBracket")) {
      name = expression();
      consume("rightSquareBracket");
    } else if (peek("string")) {
      name = string();
    } else {
      name = consume("var");
    }
    consume("colon");

    res[name] = expression();
    first = false;
  }
  consume("rightCurlyBracket");
  return res;
}

function string() {
  let stringInp = consume("string");
  return stringInp.substring(1, stringInp.length - 1).replace(/\\(.)/g, "$1");
}

vars = {
  console: console,
  someObj: {
    someVal: 56,
    someAr: [34, "yes"],
  },
};
tokens = tokenize(
  `console.log({
    sum: someObj.someAr[0] + someObj.someVal, 
    toString1: 2+1.toString(), 
    toString2: (2+2).toString() + " \\\\\\"hoi", 
    logic: 34 > 19 && !false, 
    negation: !34,
    doubleNegation: !!34,
    dynamicIndexing: {
      [23+"test"]: false
    },
    array: [34, 45*7],
    inlineIf: true ? false ? "notThis" : "this": "notThisEither",
    "potatoes": 3
  })`,
  lexer
);
tokens = removeTokens(tokens, "space");
let result = expression();
console.log(result);

@TarVK
Copy link
Author

TarVK commented Aug 12, 2020

And finally a version that creates an AST first and evaluates after,
allowing for false && console.log('not logged') to work properly.

/*******************
 * All helper code *
 *******************/
/**
 * @typedef {Object} Token
 * @property {string} type The type of token
 * @property {string} value The textual value of the token
 * @property {number} index The index of the token in the input
 */

/**
 * Tokenizes the input text using the given rules
 * @param {string} text The text to tokenize
 * @param {{[key: string]: RegExp}} rules The rules
 * @returns {Token[]} The tokens
 */
function tokenize(text, rules) {
  const ruleNames = Object.keys(rules);
  let index = 0;
  const tokens = [];
  outer: while (text.length > 0) {
    for (let i = 0; i < ruleNames.length; i++) {
      const ruleName = ruleNames[i];
      const rule = rules[ruleName];

      // Try to match the rule
      const match = rule.exec(text);
      if (match && match.index == 0) {
        // If the rule matches, stORe the token
        tokens.push({ type: ruleName, value: match[0], index });

        // Remove the text, AND continue tokenizing the remaining text
        text = text.substring(match[0].length);
        index += match[0].length;
        continue outer;
      }
    }

    // If no rule matches the text, throw some error
    throw Error("Unexpected token " + text[0] + " at index " + index);
  }
  return tokens;
}

/**
 * @typedef {Object} ASTNode
 * @property {string} type The node type
 * @property {(ASTNode|string)[]} children The children of the node
 */
/**
 * Walks the given AST tree AND obtains a result using the given rules
 * @param {ASTNode|Token} tree The tree to walk
 * @param {{[type: string]: (children: any[], map)=>any}} rules The rules to apply fOR each node to obtain a result
 * @returns {any} The result obtained from the tree
 */
function walkTree(tree, rules) {
  if ("value" in tree) return tree.value;
  const rule = rules[tree.type];
  return rule
    ? rule(tree.children, (node) => walkTree(node, rules))
    : tree.children;
}

/**
 * Creates an AST node
 * @param {string} type The type
 * @param {(ASTNode|Token)[]} children The children
 * @returns {ASTNode} The new node
 */
function node(type, children) {
  return {
    type,
    children,
  };
}

/**
 * Removes the given token type from the list of tokens
 * @param {Token[]} tokens The input tokens
 * @param {string} rem The token type to remove
 * @returns {Token[]} The tokens with the given type removed
 */
function removeTokens(tokens, rem) {
  return tokens.filter(({ type }) => type != rem);
}

// The tokens that are analyzed
let tokens;
// Variables accessible to the text being evaluated
let vars;

/**
 * Consumes a token
 * @param {string} token The token to consume
 * @throws If the expected token wasn't found
 * @returns {string} The value of the token
 */
function consume(token) {
  const firstToken = tokens.shift(); // Get the first token
  if (!firstToken || firstToken.type != token)
    throw Error(
      "Unexpected token, expected '" +
        token +
        "' but found '" +
        firstToken.type +
        "' at index " +
        firstToken.index
    );
  return firstToken;
}

/**
 * Checks whether the first token is of the given type
 * @param {string} token The token that is expected
 * @returns {boolean} Whether the expected token was found
 */
function peek(token) {
  return tokens[0] && tokens[0].type == token;
}

/**
 * Combines peek and consume, consuming a token only if matched, without throwing an error if not
 * @param {string} token The token that is expected
 * @returns {false|string} Whether the expected token was found
 */
function optConsume(token) {
  const matched = tokens[0] && tokens[0].type == token;
  if (matched) {
    return consume(token);
  }
  return false;
}

/** A replacement fOR the undefined constant, to not clash with js undefined */
const Undefined = Symbol("undefined");

/***********************************
 * All the lexer and grammar rules *
 ***********************************/
const lexer = {
  LEFTBRACKET: /\(/,
  RIGHTBRACKET: /\)/,
  LEFTSQUAREBRACKET: /\[/,
  RIGHTSQUAREBRACKET: /\]/,
  LEFTCURLYBRACKET: /\{/,
  RIGHTCURLYBRACKET: /\}/,
  COLON: /\:/,
  QUESTIONMARK: /\?/,
  NUMBER: /\d*\.?\d+/,
  BOOLEAN: /(true|false)(?!\w)/,
  LITERAL: /\w+/,
  ADD: /\+/,
  SUBTRACT: /\-/,
  MULTIPLY: /\*/,
  DIVIDE: /\//,
  DOT: /\./,
  COMMA: /\,/,
  SPACE: /\s+/,
  STRING: /(\"(\\\\|\\\"|[^"])*\")|(\'(\\\\|\\\'|[^'])*\')/,
  LESSTHANEQUAL: /<=/,
  GREATERTHANEQUAL: />=/,
  LESSTHAN: /</,
  GREATERTHAN: />/,
  STRICTEQUAL: /===/,
  EQUAL: /==/,
  STRICTNOTEQUAL: /!==/,
  NOTEQUAL: /!=/,
  NOT: /!/,
  OR: /\|\|/,
  AND: /&&/,
};

function expression() {
  let res = logic();
  if (optConsume("QUESTIONMARK")) {
    const t = expression();
    consume("COLON");
    const f = expression();
    res = node("inlineIf", [res, t, f]);
  }
  return res;
}

function logic() {
  let res = clause();
  let loop = true;
  do {
    if (optConsume("OR")) {
      res = node("or", [res, clause()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function clause() {
  let res = subClause();
  let loop = true;
  do {
    if (optConsume("AND")) {
      res = node("and", [res, subClause()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function subClause() {
  let res = comparee();
  outer: while (true) {
    let options = [
      "LESSTHAN",
      "LESSTHANEQUAL",
      "GREATERTHAN",
      "GREATERTHANEQUAL",
      "EQUAL",
      "NOTEQUAL",
      "STRICTEQUAL",
      "STRICTNOTEQUAL",
    ];
    for (let option of options) {
      if (optConsume(option)) {
        res = node(option.toLowerCase(), [res, comparee()]);
        continue outer;
      }
    }
    break outer;
  }
  return res;
}

function comparee() {
  let res = term();
  let loop = true;
  do {
    if (optConsume("ADD")) {
      res = node("add", [res, term()]);
    } else if (optConsume("SUBTRACT")) {
      res = node("subtract", [res, term()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function term() {
  let res = factor();
  let loop = true;
  do {
    if (optConsume("MULTIPLY")) {
      res = node("multiply", [res, factor()]);
    } else if (optConsume("DIVIDE")) {
      res = node("divide", [res, factor()]);
    } else {
      loop = false;
    }
  } while (loop);
  return res;
}

function factor() {
  let res;

  let invert = undefined;
  while (optConsume("NOT")) invert = !invert;

  if (peek("NUMBER")) {
    res = node("number", [consume("NUMBER")]);
  } else if (peek("LITERAL")) {
    res = node("var", [consume("LITERAL")]);
  } else if (peek("STRING")) {
    res = node("string", [consume("STRING")]);
  } else if (peek("BOOLEAN")) {
    res = node("boolean", [consume("BOOLEAN")]);
  } else if (peek("LEFTSQUAREBRACKET")) {
    res = array();
  } else if (peek("LEFTCURLYBRACKET")) {
    res = object();
  } else {
    consume("LEFTBRACKET");
    res = expression();
    consume("RIGHTBRACKET");
  }

  if (invert) res = node("not", [res]);
  if (invert == false) res = node("not", [node("NOT", [res])]); // invert defaults to undefined

  return manyAccessors(res);
}

function manyAccessors(value) {
  let res = value;
  let loop = true;
  do {
    let newRes = Undefined;
    if (newRes == Undefined) newRes = optProperty(res);
    if (newRes == Undefined) newRes = optIndexer(res);
    if (newRes == Undefined) newRes = optCall(res);

    if (newRes != Undefined) {
      res = newRes;
    } else {
      loop = false;
    }
  } while (loop);

  return res;
}

function optProperty(value) {
  if (optConsume("DOT")) {
    return node("property", [value, consume("LITERAL")]);
  }
  return Undefined;
}

function optIndexer(value) {
  if (optConsume("LEFTSQUAREBRACKET")) {
    const res = node("property", [value, expression()]);
    consume("RIGHTSQUAREBRACKET");
    return res;
  }
  return Undefined;
}

function optCall(value) {
  if (optConsume("LEFTBRACKET")) {
    let args = [];
    while (!peek("RIGHTBRACKET")) {
      if (args.length > 0) consume("COMMA");
      args.push(expression());
    }
    let res = node("call", [value, args]);
    consume("RIGHTBRACKET");
    return res;
  }
  return Undefined;
}

function array() {
  let res = [];
  consume("LEFTSQUAREBRACKET");
  while (!peek("RIGHTSQUAREBRACKET")) {
    if (res.length > 0) consume("COMMA");
    res.push(expression());
  }
  consume("RIGHTSQUAREBRACKET");
  return node("array", [res]);
}

function object() {
  let res = [];
  let first = true;
  consume("LEFTCURLYBRACKET");
  while (!peek("RIGHTCURLYBRACKET")) {
    if (!first) consume("COMMA");

    let name;
    if (optConsume("LEFTSQUAREBRACKET")) {
      name = expression();
      consume("RIGHTSQUAREBRACKET");
    } else if (peek("STRING")) {
      name = node("string", [consume("STRING")]);
    } else {
      name = consume("LITERAL");
    }
    consume("COLON");

    res.push([name, expression()]);
    first = false;
  }
  consume("RIGHTCURLYBRACKET");
  return node("object", [res]);
}

const visitor = {
  inlineIf: ([c, t, f], r) => (r(c) ? r(t) : r(f)),
  or: ([a, b], r) => r(a) || r(b),
  and: ([a, b], r) => r(a) && r(b),
  not: ([a], r) => !r(a),
  lessthan: ([a, b], r) => r(a) < r(b),
  lessthanequal: ([a, b], r) => r(a) <= r(b),
  greaterthan: ([a, b], r) => r(a) > r(b),
  greaterthanequal: ([a, b], r) => r(a) >= r(b),
  equal: ([a, b], r) => r(a) == r(b),
  strictequal: ([a, b], r) => r(a) === r(b),
  notequal: ([a, b], r) => r(a) != r(b),
  strictnotequal: ([a, b], r) => r(a) !== r(b),
  add: ([a, b], r) => r(a) + r(b),
  subtract: ([a, b], r) => r(a) - r(b),
  multiply: ([a, b], r) => r(a) * r(b),
  divide: ([a, b], r) => r(a) / r(b),
  literal: ([v]) => v,
  property: ([v, p], r) => {
    const value = r(v);
    const prop = value[r(p)];
    return prop instanceof Function ? prop.bind(value) : prop;
  },
  call: ([v, args], r) => r(v)(...args.map((arg) => r(arg))),
  string: ([v], r) => {
    const stringInp = r(v);
    return stringInp.substring(1, stringInp.length - 1).replace(/\\(.)/g, "$1");
  },
  number: ([v], r) => parseFloat(r(v)),
  boolean: ([v], r) => r(v) == "true",
  var: ([v], r) => vars[r(v)],
  array: ([items], r) => items.map((item) => r(item)),
  object: ([props], r) => {
    const res = {};
    props.forEach(([key, value]) => (res[r(key)] = r(value)));
    return res;
  },
};

/******************************************
 * Usages of lexer + parser + interpreter *
 ******************************************/
vars = {
  console: console,
  someObj: {
    someVal: 56,
    someAr: [34, "yes"],
  },
};
tokens = tokenize(
  `console.log({
    sum: someObj.someAr[0] + someObj.someVal, 
    toString1: 2+1.toString(), 
    toString2: (2+2).toString() + " \\\\\\"hoi", 
    logic: 34 > 19 && !false, 
    negation: !34,
    doubleNegation: !!34,
    dynamicIndexing: {
      [23+"test"]: false
    },
    array: [34, 45*7],
    inlineIf: true ? false ? "notThis" : "this": "NotThisEither",
    AndBreak: false && console.log('noCall'),
    noAndBreak: true && console.log('call'),
    "potatoes": 3
  })`,
  lexer
);
tokens = removeTokens(tokens, "SPACE");
let result = walkTree(expression(), visitor);
console.log(result);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment