wylieconlon/parser.js

## parser.js
const TOKENS = {
  START: 'start',
  END: 'end',
};

function tokenize(code) {
  let current = 0;
  let startOfToken;
  let tokens = [];

  // Match the first character of any token
  while (current < code.length) {
    startOfToken = current;
    switch(code[current]) {
      case '(':
        tokens.push(TOKENS.START);
        current++;
        break;
      case ')':
        tokens.push(TOKENS.END);
        current++;
        break;
      case '"':
      case '\'':
        _consumeString(code[current]);
        break;
      case (code[current].match(/[0-9]/) || {}).input:
        _consumeNumber();
        break;
      case (code[current].match(/\S/) || {}).input:
        // Non white space character
        _consumeOperator();
        break;
      default:
        // All white space is ignored between tokens
        current++;
        break;
    }
  }

  function _consumeOperator() {
    while (/\S/.test(code[current]) && current < code.length) {
      current++;
    }
    tokens.push(code.substring(startOfToken, current));
  }

  function _consumeString(stringMarker) {
    startOfToken++;
    current++;
    while (code[current] !== stringMarker && current < code.length) {
      current++;
    }
    const stringToken = '"' + code.substring(startOfToken, current) + '"';
    tokens.push(stringToken);
    current++;
  }

  function _consumeNumber() {
    // This is not valid for exponentiated numbers such as 0.5e10
    while (/[0-9\.]/.test(code[current]) && current < code.length) {
      current++;
    }
    tokens.push(parseFloat(code.substring(startOfToken, current)));
  }

  return tokens;
}

function parse(code) {
  const tokens = tokenize(code);

  let globalIndex = 0;
  let errors = [];

  let result = _consumeExpression();

  /**
   * expression:
   *   start
   *   operator
   *   (repeated) literal or expression
   *   end
   */
  function _consumeExpression() {
    let ast = [];

    if (tokens[globalIndex] !== TOKENS.START) {
      errors.push('Missing opening parentheses');
    }

    globalIndex++;
    if (globalIndex < tokens.length) {
      const operator = tokens[globalIndex];

      if (typeof operator !== 'string') {
        errors.push('Invalid operator ' + operator);
      } else if (operator === TOKENS.START || operator === TOKENS.END) {
        errors.push('Invalid operator ' + operator);
      } else if (operator.indexOf('"') === 0) {
        errors.push('Invalid operator ' + operator);
      } else {
        ast.push(operator);
      }

      globalIndex++;
    }

    while (globalIndex < tokens.length) {
      if (tokens[globalIndex] === TOKENS.START) {
        const expression = _consumeExpression();
        if (expression.length > 0) {
          ast.push(expression);
        }
      } else if (tokens[globalIndex] === TOKENS.END) {
        // Early return if we find an end token
        return ast;
      } else {
        ast.push(tokens[globalIndex]);
      }

      globalIndex++;
    }

    errors.push('Missing closing parentheses');
  }

  if (errors.length) {
    throw new Error(errors.join('\n'));
  }

  return result;
}

module.exports = {
  parse,
  tokenize
};

## test.js
const assert = require('assert');

const { tokenize, parse } = require('../parser.js');

describe('Tokenizing list expressions', function() {
  it('handles a simple numeric expression', function() {
    const tokens = tokenize('(+ 1 2)');

    assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
  });

  it('handles a simple expression with string literals', function() {
    const tokens = tokenize('(list "boba fett" "jabba the hutt")');

    assert.deepEqual(tokens, ['start', 'list', '"boba fett"', '"jabba the hutt"', 'end']);
  });

  it('handles a nested expression', function() {
    const tokens = tokenize('(- (+ 1 2) 5)');

    assert.deepEqual(tokens, ['start', '-', 'start', '+', 1, 2, 'end', 5, 'end']);
  });

  it('ignores whitespaces', function() {
    const tokens = tokenize('\n\n(\n  + \n1\n2)\n\n');

    assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
  });
});

describe('Parsing tokens into AST', function() {
  it('handles a simple expression', function() {
    const ast = parse('(+ 1 2)');

    assert.deepEqual(ast, ['+', 1, 2]);
  });

  it('handles a nested expression', function() {
    const ast = parse('(- (+ 1 2) 5)');

    assert.deepEqual(ast, ['-', ['+', 1, 2], 5]);
  });

  it('throws an error when the operator is a number', function() {
    assert.throws(function() {
      parse('(5)');
    }, /Invalid operator 5/);
  });

  it('throws an error when the operator is a string', function() {
    assert.throws(function() {
      parse('("Not an operator")');
    }, /Invalid operator "Not an operator"/);
  });

  it('throws an error when an expression is missing opening parens', function() {
    assert.throws(function() {
      parse('5)');
    }, /Missing opening parentheses/);
  });

  it('throws an error when an expression is missing closing parens', function() {
    assert.throws(function() {
      parse('((list)');
    }, /Missing closing parentheses/);
  });
});
	const TOKENS = {
	START: 'start',
	END: 'end',
	};

	function tokenize(code) {
	let current = 0;
	let startOfToken;
	let tokens = [];

	// Match the first character of any token
	while (current < code.length) {
	startOfToken = current;
	switch(code[current]) {
	case '(':
	tokens.push(TOKENS.START);
	current++;
	break;
	case ')':
	tokens.push(TOKENS.END);
	current++;
	break;
	case '"':
	case '\'':
	_consumeString(code[current]);
	break;
	case (code[current].match(/[0-9]/) \|\| {}).input:
	_consumeNumber();
	break;
	case (code[current].match(/\S/) \|\| {}).input:
	// Non white space character
	_consumeOperator();
	break;
	default:
	// All white space is ignored between tokens
	current++;
	break;
	}
	}

	function _consumeOperator() {
	while (/\S/.test(code[current]) && current < code.length) {
	current++;
	}
	tokens.push(code.substring(startOfToken, current));
	}

	function _consumeString(stringMarker) {
	startOfToken++;
	current++;
	while (code[current] !== stringMarker && current < code.length) {
	current++;
	}
	const stringToken = '"' + code.substring(startOfToken, current) + '"';
	tokens.push(stringToken);
	current++;
	}

	function _consumeNumber() {
	// This is not valid for exponentiated numbers such as 0.5e10
	while (/[0-9\.]/.test(code[current]) && current < code.length) {
	current++;
	}
	tokens.push(parseFloat(code.substring(startOfToken, current)));
	}

	return tokens;
	}

	function parse(code) {
	const tokens = tokenize(code);

	let globalIndex = 0;
	let errors = [];

	let result = _consumeExpression();

	/**
	* expression:
	* start
	* operator
	* (repeated) literal or expression
	* end
	*/
	function _consumeExpression() {
	let ast = [];

	if (tokens[globalIndex] !== TOKENS.START) {
	errors.push('Missing opening parentheses');
	}

	globalIndex++;
	if (globalIndex < tokens.length) {
	const operator = tokens[globalIndex];

	if (typeof operator !== 'string') {
	errors.push('Invalid operator ' + operator);
	} else if (operator === TOKENS.START \|\| operator === TOKENS.END) {
	errors.push('Invalid operator ' + operator);
	} else if (operator.indexOf('"') === 0) {
	errors.push('Invalid operator ' + operator);
	} else {
	ast.push(operator);
	}

	globalIndex++;
	}

	while (globalIndex < tokens.length) {
	if (tokens[globalIndex] === TOKENS.START) {
	const expression = _consumeExpression();
	if (expression.length > 0) {
	ast.push(expression);
	}
	} else if (tokens[globalIndex] === TOKENS.END) {
	// Early return if we find an end token
	return ast;
	} else {
	ast.push(tokens[globalIndex]);
	}

	globalIndex++;
	}

	errors.push('Missing closing parentheses');
	}

	if (errors.length) {
	throw new Error(errors.join('\n'));
	}

	return result;
	}

	module.exports = {
	parse,
	tokenize
	};
	const assert = require('assert');

	const { tokenize, parse } = require('../parser.js');

	describe('Tokenizing list expressions', function() {
	it('handles a simple numeric expression', function() {
	const tokens = tokenize('(+ 1 2)');

	assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
	});

	it('handles a simple expression with string literals', function() {
	const tokens = tokenize('(list "boba fett" "jabba the hutt")');

	assert.deepEqual(tokens, ['start', 'list', '"boba fett"', '"jabba the hutt"', 'end']);
	});

	it('handles a nested expression', function() {
	const tokens = tokenize('(- (+ 1 2) 5)');

	assert.deepEqual(tokens, ['start', '-', 'start', '+', 1, 2, 'end', 5, 'end']);
	});

	it('ignores whitespaces', function() {
	const tokens = tokenize('\n\n(\n + \n1\n2)\n\n');

	assert.deepEqual(tokens, ['start', '+', 1, 2, 'end']);
	});
	});

	describe('Parsing tokens into AST', function() {
	it('handles a simple expression', function() {
	const ast = parse('(+ 1 2)');

	assert.deepEqual(ast, ['+', 1, 2]);
	});

	it('handles a nested expression', function() {
	const ast = parse('(- (+ 1 2) 5)');

	assert.deepEqual(ast, ['-', ['+', 1, 2], 5]);
	});

	it('throws an error when the operator is a number', function() {
	assert.throws(function() {
	parse('(5)');
	}, /Invalid operator 5/);
	});

	it('throws an error when the operator is a string', function() {
	assert.throws(function() {
	parse('("Not an operator")');
	}, /Invalid operator "Not an operator"/);
	});

	it('throws an error when an expression is missing opening parens', function() {
	assert.throws(function() {
	parse('5)');
	}, /Missing opening parentheses/);
	});

	it('throws an error when an expression is missing closing parens', function() {
	assert.throws(function() {
	parse('((list)');
	}, /Missing closing parentheses/);
	});
	});