DmitrySoshnikov/LL-parser.js

## 2 changes: 1 addition & 1 deletion LL-parser.js
@@ -5,7 +5,7 @@

     * MIT Style license
 * MIT Style license

     *
 *

     * Often one can see manually written LL parsers implemented as
 * Often one can see manually written LL parsers implemented as

     * recursive decent. Approach in this diff is a classical parse table
 * recursive decent. Approach in this diff is a classical parse table

     * recursive descent. Approach in this diff is a classical parse table
 * recursive descent. Approach in this diff is a classical parse table

     * state machine.
 * state machine.

     *
 *

     *  LL parser consists of:
 *  LL parser consists of:


## 2 changes: 1 addition & 1 deletion LL-parser.js
@@ -122,7 +122,7 @@ function buildTable(grammar, source) {

      return {
  return {

        'S': {'(': 2, 'a': 1},
    'S': {'(': 2, 'a': 1},

        'F': {'a': 3}
    'F': {'a': 3}

      }
  }

      };
  };

    }
}


    var productionNumbers = [];
var productionNumbers = [];


## 2 changes: 1 addition & 1 deletion LL-parser.js
@@ -67,7 +67,7 @@

     *
 *

     * S -> ( S + F ) -> ( F + F ) -> ( a + F ) -> ( a + a )
 * S -> ( S + F ) -> ( F + F ) -> ( a + F ) -> ( a + a )

     *
 *

     * We see that each time the *left most* non-terminal is derived. Hence, the
 * We see that each time the *left most* non-terminal is derived. Hence, the

     * We see that each time the *left most* non-terminal is replaced. Hence, the
 * We see that each time the *left most* non-terminal is replaced. Hence, the

     * name of the parser: LL - scan source from Left to right, and apply the
 * name of the parser: LL - scan source from Left to right, and apply the

     * Left most derivation.
 * Left most derivation.

     */
 */


## 18 changes: 10 additions & 8 deletions LL-parser.js
@@ -10,9 +10,9 @@

     *
 *

     *  LL parser consists of:
 *  LL parser consists of:

     *
 *

     * - input buffer (source code)
 * - input buffer (source code)

     * - stack
 * - stack

     * - parsing table (state machine)
 * - parsing table (state machine)

     * 1. input buffer (source code)
 * 1. input buffer (source code)

     * 2. stack
 * 2. stack

     * 3. parsing table (state machine)
 * 3. parsing table (state machine)

     *
 *

     * Parsing table:
 * Parsing table:

     *
 *
@@ -21,15 +21,17 @@

     *   on top of the stack.
 *   on top of the stack.

     *
 *

     *   - Rows in the table are non-terminals
 *   - Rows in the table are non-terminals

     *   - Cols are terminals
 *   - Cols are terminals

     *   - Columns are terminals
 *   - Columns are terminals

     *
 *

     * Parsing algorithm:
 * Parsing algorithm:

     *
 *

     *   - if the top of the stack is *terminal*, then just
 *   - if the top of the stack is *terminal*, then just

     *     discard it from the stack, and move cursor further.
 *     discard it from the stack, and move cursor further.


     *   - if the top of the stack is *terminal*, and matches current symbol in
 *   - if the top of the stack is *terminal*, and matches current symbol in

     *     buffer, then just discard it from the stack, and move cursor further.
 *     buffer, then just discard it from the stack, and move cursor further.

     *     (if doesn't match -- parse error).
 *     (if doesn't match -- parse error).

     *
 *

     *   - Else (it must be a non-terminal), replace it with an
 *   - Else (it must be a non-terminal), replace it with an

     *     alternative production, corresponding to the production number.
 *     alternative production, corresponding to the production number.

     *     (if no production -- parse error).
 *     (if no production -- parse error).

     *
 *

     * $ - is a special symbol used to mark bottom of the stack
 * $ - is a special symbol used to mark bottom of the stack

     *     and end of the buffer.
 *     and end of the buffer.
@@ -139,7 +141,7 @@ function parseFromTable(source, table) {

        var current = source[cursor];
    var current = source[cursor];

        var top = stack.shift();
    var top = stack.shift();

        // Terminal is on the stack, just advance.
    // Terminal is on the stack, just advance.

        if (isTerminal(top, table)) {
    if (isTerminal(top, table)) {

        if (isTerminal(top, table) && top === current) {
    if (isTerminal(top, table) && top === current) {

          // We already shifted the symbol from the stack,
      // We already shifted the symbol from the stack,

          // so just advance the cursor.
      // so just advance the cursor.

          cursor++;
      cursor++;


## 2 changes: 1 addition & 1 deletion LL-parser.js
@@ -160,7 +160,7 @@ function getProduction(table, top, current) {

      var nextProductionNumber = table[top][current];
  var nextProductionNumber = table[top][current];


      if (!nextProductionNumber) {
  if (!nextProductionNumber) {

        throw Error('Parse error: unexpected token ', current);
    throw Error('Parse error: unexpected token ', current);

        throw Error('Parse error, unexpected token: ' + current);
    throw Error('Parse error, unexpected token: ' + current);

      }
  }


      var nextProduction = grammar[nextProductionNumber];
  var nextProduction = grammar[nextProductionNumber];


## 3 changes: 3 additions & 0 deletions LL-parser.js
@@ -138,12 +138,15 @@ function parseFromTable(source, table) {

      for (var cursor = 0; cursor < source.length;) {
  for (var cursor = 0; cursor < source.length;) {

        var current = source[cursor];
    var current = source[cursor];

        var top = stack.shift();
    var top = stack.shift();

        // Terminal is on the stack, just advance.
    // Terminal is on the stack, just advance.

        if (isTerminal(top, table)) {
    if (isTerminal(top, table)) {

          // We already shifted the symbol from the stack,
      // We already shifted the symbol from the stack,

          // so just advance the cursor.
      // so just advance the cursor.

          cursor++;
      cursor++;

          continue;
      continue;

        }
    }

        // Else, it's a non-terminal, do derivation (replace it
    // Else, it's a non-terminal, do derivation (replace it

        // in the stack with corresponding production).
    // in the stack with corresponding production).

        stack.unshift.apply(stack, getProduction(table, top, current));
    stack.unshift.apply(stack, getProduction(table, top, current));

      }
  }

      console.log('Accepted. Productions:', productionNumbers.join(', '), '\n');
  console.log('Accepted. Productions:', productionNumbers.join(', '), '\n');


## 8 changes: 8 additions & 0 deletions LL-parser.js
@@ -60,6 +60,14 @@

     *   | S  2  -  1  -  - |
 *   | S  2  -  1  -  - |

     *   | F  -  -  3  -  - |
 *   | F  -  -  3  -  - |

     *   +------------------+
 *   +------------------+

     *
 *

     * The production rules which are applied to parse `(a + a)` are: 2, 1, 3, 3:
 * The production rules which are applied to parse `(a + a)` are: 2, 1, 3, 3:

     *
 *

     * S -> ( S + F ) -> ( F + F ) -> ( a + F ) -> ( a + a )
 * S -> ( S + F ) -> ( F + F ) -> ( a + F ) -> ( a + a )

     *
 *

     * We see that each time the *left most* non-terminal is derived. Hence, the
 * We see that each time the *left most* non-terminal is derived. Hence, the

     * name of the parser: LL - scan source from Left to right, and apply the
 * name of the parser: LL - scan source from Left to right, and apply the

     * Left most derivation.
 * Left most derivation.

     */
 */


## 10 changes: 5 additions & 5 deletions LL-parser.js
@@ -95,12 +95,12 @@ function printGrammar(grammar) {

     * Builds a state-machine table where table[non-terminal][terminal]
 * Builds a state-machine table where table[non-terminal][terminal]

     * coordinates determine which next production rule to apply.
 * coordinates determine which next production rule to apply.

     */
 */

    function buildTable(grammar) {
function buildTable(grammar) {

    function buildTable(grammar, source) {
function buildTable(grammar, source) {

      // For now we assume a correct table was already built from
  // For now we assume a correct table was already built from

      // the grammar for us. We'll cover how to build it automatically
  // the grammar for us. We'll cover how to build it automatically

      // in the next lessons (see "first" and "follow" sets topic).
  // in the next lessons (see "first" and "follow" sets topic).

      // We encode only valid rules here and skip all other (they
  // We encode only valid rules here and skip all other (they

      // return `undefined` meaning a parse error).
  // return `undefined` meaning a parse error).

      // the grammar and source for us. We'll cover how to build it
  // the grammar and source for us. We'll cover how to build it

      // automatically in the next lessons (see "first" and "follow"
  // automatically in the next lessons (see "first" and "follow"

      // sets topic). We encode only valid rules here and skip all other
  // sets topic). We encode only valid rules here and skip all other

      // (they return `undefined` meaning a parse error).
  // (they return `undefined` meaning a parse error).

      //
  //

      // +------------------+
  // +------------------+

      // |    (  )  a  +  $ |
  // |    (  )  a  +  $ |


## 2 changes: 1 addition & 1 deletion LL-parser.js
@@ -80,7 +80,7 @@ var grammar = {

    var stack = ['S', '$'];
var stack = ['S', '$'];


    function parse(source) {
function parse(source) {

      return parseFromTable(source, buildTable(grammar));
  return parseFromTable(source, buildTable(grammar));

      return parseFromTable(source, buildTable(grammar, source));
  return parseFromTable(source, buildTable(grammar, source));

    }
}


    function printGrammar(grammar) {
function printGrammar(grammar) {


## 10 changes: 5 additions & 5 deletions LL-parser.js
@@ -5,10 +5,10 @@

     * MIT Style license
 * MIT Style license

     *
 *

     * Often one can see manually written LL parsers implemented as
 * Often one can see manually written LL parsers implemented as

     * recursive decent. Approach in this diff uses a state machine
 * recursive decent. Approach in this diff uses a state machine

     * tracking a parse table.
 * tracking a parse table.

     * recursive decent. Approach in this diff is a classical parse table
 * recursive decent. Approach in this diff is a classical parse table

     * state machine.
 * state machine.

     *
 *

     *  LL parser consits of:
 *  LL parser consits of:

     *  LL parser consists of:
 *  LL parser consists of:

     *
 *

     * - input buffer (source code)
 * - input buffer (source code)

     * - stack
 * - stack
@@ -29,14 +29,14 @@

     *     discard it from the stack, and move cursor further.
 *     discard it from the stack, and move cursor further.


     *   - Else (it must be a non-terminal), replace it with an
 *   - Else (it must be a non-terminal), replace it with an

     *     alternaive production, corresponding to the production number.
 *     alternaive production, corresponding to the production number.

     *     alternative production, corresponding to the production number.
 *     alternative production, corresponding to the production number.

     *
 *

     * $ - is a special symbol used to mark bottom of the stack
 * $ - is a special symbol used to mark bottom of the stack

     *     and end of the buffer.
 *     and end of the buffer.

     *
 *

     * S - is a start symbol.
 * S - is a start symbol.

     *
 *

     * At the begginign stack is:
 * At the begginign stack is:

     * At the beginning stack is:
 * At the beginning stack is:

     *
 *

     * [S, $]
 * [S, $]

     *
 *


## 177 changes: 177 additions & 0 deletions LL-parser.js
@@ -0,0 +1,177 @@

    /**
/**

     * = LL parser =
 * = LL parser =

     *
 *

     * by Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
 * by Dmitry Soshnikov <dmitry.soshnikov@gmail.com>

     * MIT Style license
 * MIT Style license

     *
 *

     * Often one can see manually written LL parsers implemented as
 * Often one can see manually written LL parsers implemented as

     * recursive decent. Approach in this diff uses a state machine
 * recursive decent. Approach in this diff uses a state machine

     * tracking a parse table.
 * tracking a parse table.

     *
 *

     *  LL parser consits of:
 *  LL parser consits of:

     *
 *

     * - input buffer (source code)
 * - input buffer (source code)

     * - stack
 * - stack

     * - parsing table (state machine)
 * - parsing table (state machine)

     *
 *

     * Parsing table:
 * Parsing table:

     *
 *

     *   Table is used to get the next production number to apply, based on current
 *   Table is used to get the next production number to apply, based on current

     *   symbol from the buffer, and the symbol (terminal or non-terminal)
 *   symbol from the buffer, and the symbol (terminal or non-terminal)

     *   on top of the stack.
 *   on top of the stack.

     *
 *

     *   - Rows in the table are non-terminals
 *   - Rows in the table are non-terminals

     *   - Cols are terminals
 *   - Cols are terminals

     *
 *

     * Parsing algorithm:
 * Parsing algorithm:

     *
 *

     *   - if the top of the stack is *terminal*, then just
 *   - if the top of the stack is *terminal*, then just

     *     discard it from the stack, and move cursor further.
 *     discard it from the stack, and move cursor further.


     *   - Else (it must be a non-terminal), replace it with an
 *   - Else (it must be a non-terminal), replace it with an

     *     alternaive production, corresponding to the production number.
 *     alternaive production, corresponding to the production number.

     *
 *

     * $ - is a special symbol used to mark bottom of the stack
 * $ - is a special symbol used to mark bottom of the stack

     *     and end of the buffer.
 *     and end of the buffer.

     *
 *

     * S - is a start symbol.
 * S - is a start symbol.

     *
 *

     * At the begginign stack is:
 * At the begginign stack is:

     *
 *

     * [S, $]
 * [S, $]

     *
 *

     * Example:
 * Example:

     *
 *

     * Grammar:
 * Grammar:

     *
 *

     *   1. S -> F
 *   1. S -> F

     *   2. S -> (S + F)
 *   2. S -> (S + F)

     *   3. F -> a
 *   3. F -> a

     *
 *

     * Input:
 * Input:

     *
 *

     *   (a + a)
 *   (a + a)

     *
 *

     * Parse table:
 * Parse table:

     *
 *

     *   +------------------+
 *   +------------------+

     *   |    (  )  a  +  $ |
 *   |    (  )  a  +  $ |

     *   +------------------+
 *   +------------------+

     *   | S  2  -  1  -  - |
 *   | S  2  -  1  -  - |

     *   | F  -  -  3  -  - |
 *   | F  -  -  3  -  - |

     *   +------------------+
 *   +------------------+

     */
 */


    /**
/**

     * Our grammar representation. Key is a production number from
 * Our grammar representation. Key is a production number from

     * the grammar, the value is: 0 - LHS, 1 - RHS of the production.
 * the grammar, the value is: 0 - LHS, 1 - RHS of the production.

     */
 */

    var grammar = {
var grammar = {

      1: ['S', 'F'],       // 1. S -> F
  1: ['S', 'F'],       // 1. S -> F

      2: ['S', '(S + F)'], // 2. S -> (S + F)
  2: ['S', '(S + F)'], // 2. S -> (S + F)

      3: ['F', 'a'],       // 3. F -> a
  3: ['F', 'a'],       // 3. F -> a

    };
};


    /**
/**

     * Initial stack: bottom is the "end of the stack" ($),
 * Initial stack: bottom is the "end of the stack" ($),

     * and the start symbol ('S' in our case) is there too.
 * and the start symbol ('S' in our case) is there too.

     */
 */

    var stack = ['S', '$'];
var stack = ['S', '$'];


    function parse(source) {
function parse(source) {

      return parseFromTable(source, buildTable(grammar));
  return parseFromTable(source, buildTable(grammar));

    }
}


    function printGrammar(grammar) {
function printGrammar(grammar) {

      console.log('Grammar:\n');
  console.log('Grammar:\n');

      for (var k in grammar) {
  for (var k in grammar) {

        console.log('  ' + k + '.', grammar[k][0], '->', grammar[k][1]);
    console.log('  ' + k + '.', grammar[k][0], '->', grammar[k][1]);

      }
  }

      console.log('');
  console.log('');

    }
}


    /**
/**

     * Builds a state-machine table where table[non-terminal][terminal]
 * Builds a state-machine table where table[non-terminal][terminal]

     * coordinates determine which next production rule to apply.
 * coordinates determine which next production rule to apply.

     */
 */

    function buildTable(grammar) {
function buildTable(grammar) {

      // For now we assume a correct table was already built from
  // For now we assume a correct table was already built from

      // the grammar for us. We'll cover how to build it automatically
  // the grammar for us. We'll cover how to build it automatically

      // in the next lessons (see "first" and "follow" sets topic).
  // in the next lessons (see "first" and "follow" sets topic).

      // We encode only valid rules here and skip all other (they
  // We encode only valid rules here and skip all other (they

      // return `undefined` meaning a parse error).
  // return `undefined` meaning a parse error).

      //
  //

      // +------------------+
  // +------------------+

      // |    (  )  a  +  $ |
  // |    (  )  a  +  $ |

      // +------------------+
  // +------------------+

      // | S  2  -  1  -  - |
  // | S  2  -  1  -  - |

      // | F  -  -  3  -  - |
  // | F  -  -  3  -  - |

      // +------------------+
  // +------------------+

      //
  //

      return {
  return {

        'S': {'(': 2, 'a': 1},
    'S': {'(': 2, 'a': 1},

        'F': {'a': 3}
    'F': {'a': 3}

      }
  }

    }
}


    var productionNumbers = [];
var productionNumbers = [];


    /**
/**

     * Parses a source using parse table.
 * Parses a source using parse table.

     * Doesn't build a parse tree yet, but just checks a source
 * Doesn't build a parse tree yet, but just checks a source

     * string for acceptance (prints production rules appled in case
 * string for acceptance (prints production rules appled in case

     * of successful parse, or throws on parse errors).
 * of successful parse, or throws on parse errors).

     */
 */

    function parseFromTable(source, table) {
function parseFromTable(source, table) {

      printGrammar(grammar);
  printGrammar(grammar);

      console.log('Source:', source);
  console.log('Source:', source);

      source = source.replace(/\s+/g, '');
  source = source.replace(/\s+/g, '');

      for (var cursor = 0; cursor < source.length;) {
  for (var cursor = 0; cursor < source.length;) {

        var current = source[cursor];
    var current = source[cursor];

        var top = stack.shift();
    var top = stack.shift();

        if (isTerminal(top, table)) {
    if (isTerminal(top, table)) {

          // We already shifted the symbol from the stack,
      // We already shifted the symbol from the stack,

          // so just advance the cursor.
      // so just advance the cursor.

          cursor++;
      cursor++;

          continue;
      continue;

        }
    }

        stack.unshift.apply(stack, getProduction(table, top, current));
    stack.unshift.apply(stack, getProduction(table, top, current));

      }
  }

      console.log('Accepted. Productions:', productionNumbers.join(', '), '\n');
  console.log('Accepted. Productions:', productionNumbers.join(', '), '\n');

    }
}


    function isTerminal(symbol, table) {
function isTerminal(symbol, table) {

      return !table.hasOwnProperty(symbol);
  return !table.hasOwnProperty(symbol);

    }
}


    function getProduction(table, top, current) {
function getProduction(table, top, current) {

      var nextProductionNumber = table[top][current];
  var nextProductionNumber = table[top][current];


      if (!nextProductionNumber) {
  if (!nextProductionNumber) {

        throw Error('Parse error: unexpected token ', current);
    throw Error('Parse error: unexpected token ', current);

      }
  }


      var nextProduction = grammar[nextProductionNumber];
  var nextProduction = grammar[nextProductionNumber];


      productionNumbers.push(nextProductionNumber);
  productionNumbers.push(nextProductionNumber);


      // Return an array of symbols from a production, e.g.
  // Return an array of symbols from a production, e.g.

      // '(', 'S', '+', 'F', ')' for '(S + F)', since
  // '(', 'S', '+', 'F', ')' for '(S + F)', since

      // each symbol should be pushed onto the stack.
  // each symbol should be pushed onto the stack.

      return nextProduction[1].split(/\s*/);
  return nextProduction[1].split(/\s*/);

    }
}


    // Test:
// Test:

    parse('(a + a)');
parse('(a + a)');


    // Output:
// Output:


    // Grammar:
// Grammar:

    //
//

    //   1. S -> F
//   1. S -> F

    //   2. S -> (S + F)
//   2. S -> (S + F)

    //   3. F -> a
//   3. F -> a

    //
//

    // Source: (a + a)
// Source: (a + a)

    // Accepted. Productions: 2, 1, 3, 3
// Accepted. Productions: 2, 1, 3, 3