GerHobbelt/grammar-extract.jison

## grammar-extract.jison
%options ranges
%options backtrack_lexer

/*
 * lexical grammar
 * ===============
 *
 * This section defines the lexer rules for our formula parser. The rules are checked from top to bottom, so order is import
 * here!
 *
 * [...]
 */

%lex

/*
 * Remember that in `jison`, when the `lexer.option.flex` has not been set (i.e. we get default behaviour),
 * we get a hit on the first matching regex, so the order of the tokenization regexes below is
 * very important.
 *
 * `option.flex` would perform an exhaustive scan of all regexes, thus trying to find the
 * longest match every time. We do not want that in our lexical scanner!
 */

%{
    /*
     * This chunk is included in the lexer action code at the very start of that method.
     *
     * `YY_START` is defined then, `YYSTATE` is not! `yy` and `yy_` are also available here.
     */
    var s, s2, s3;
    var rv, rv2, e_offset, col, row, len, value;
    var match, match2;

    console.log("lexer action: ", yy, yy_, this, yytext, YY_START, $avoiding_name_collisions);

    var parser = yy.parser;
%}


/*
 * WARNING
 * -------
 *
 * When you use these regex 'macros' below, be aware that JISON surrounds them with () braces
 * to ensure they always act as a single element.
 *
 * Hence, for example, JISON transforms the lexer regex
 *
 *      ({ID}(\.{ID})*)(\s*\()
 *
 * into this JS regex
 *
 *      /^(?:(([a-zA-Z_][a-zA-Z0-9_]*)(\.([a-zA-Z_][a-zA-Z0-9_]*))*)(\s*\())/
 *
 * which will return more `matches[]` elements than you would expect from the lexer regex itself
 * as the regex element
 *
 *      {ID}
 *
 * itself expands to a (...)-surrounded regex element
 *
 *      ([a-zA-Z_][a-zA-Z0-9_]*)
 *
 * therefore placing the part matching
 *
 *      (\s*\()
 *
 * at `matches[]` index `[5]` rather than the originally expected `[3]`, so that input
 *
 *      MIN(x, y)
 *
 * will have the example regex match the part
 *
 *      MIN(
 *
 * as intended, while producing a `this.matches[]` array with the following content:
 *
 *      this.matches = [
 *          "MIN(",
 *          "MIN",
 *          "MIN",
 *          undefined,
 *          undefined,
 *          "("
 *      ]
 *
 * (note the `undefined` entries at `[3] `and `[4]` in there!), while input
 *
 *      A.B.C(x)
 *
 * will have the example regex match the part
 *
 *      A.B.C(
 *
 * as intended, while producing a `this.matches[]` array with the following content:
 *
 *      this.matches = ["A.B.C(", "A.B.C", "A", ".C", "C", "("]
 */

ID              [a-zA-Z_][a-zA-Z0-9_]*
DOTTED_ID       [a-zA-Z_]([a-zA-Z0-9_.]*[a-zA-Z0-9_])?


%%


// Recognize any function ID, with optional dotted sections, as a string which is followed by a `(` open brace, e.g. `A.DIST(`

{DOTTED_ID}(\s*\()
        %{
            /*
             * lookup this blurb: it MUST be a (possibly namespaced) function identifier
             * (e.g. `SUM`, `namespace.user_defined_function42`).
             *
             * Note that this is really another kind of lexical hack, just not the well-known
             * `yacc` / `lex` 'feedback' one, as here we include a part of the GRAMMAR KNOWLEDGE
             * in the lexer itself:
             *
             * since we 'know' now that the blurb `\1` is followed by an open brace `(`, we
             * can be certain that this is a function identifier and nothing else
             * that may have the same 'name', e.g. constant `E` or `PI`.
             *
             * > ### Note
             * >
             * > the braces in the regex are there so we can easily grab that bit,
             * > and in particular that very last bit: it will ALWAYS be pushed back
             * > into the lexer queue as that bit is our 'additional look-ahead' at
             * > work!
             */
            console.log("looking up function identifier token (+ look-ahead) in symbol table: ", yytext, this, this.matches);
            /*
             * **WARNING**: take heed of the comment further above regarding the `ID` etc.
             * lexer regex 'macros' and JISON's behaviour regarding those!
             *
             * Hence we should be able to pick up the `(` at the end at `this.matches[3]`!
             */
            this.unput(this.matches[3]);
            s = this.matches[1];
            rv = parser.getSymbol4Function(s);
            if (rv) {
                yytext = (new ASTopcode(rv))
                    .setLocationInfo(yylloc)
                    .setCommentsIndex(parser.getNextCommentIndex())
                    .setLexedText(s);
                // ASTopcode(FKW_FUNCTION | ...)
                return 'FUNCTION';
            }

            // when we get here, the blurb didn't match anything sensible...
            yytext = (new ASTerror(FERR_EXPECTED_FUNCTION_NAME, "Expected a (possibly namespaced) function name."))
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex())
                .setLexedText(s);
            return 'error';
        %}

[...]

"||"
        %{
            yytext = (new ASTopcode(FKW_BOOLEAN_OR_OPERATOR | FT_BOOLEAN | FU_DERIVED))
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex())
                .setLexedText(yytext);
            return 'BOOLEAN_OR_OPERATOR';
        %}

[...]

"\u201c"([^\u201d]*)"\u201d"
        %{                                                  /* “string” */
            s = this.matches[1];
            yytext = (new ASTvalue(s, FKW_VALUE | FT_STRING | FU_STRING))
                .setNotationAttributes(FKA_DELIMITERS_201C)
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex());
            return 'STRING';
        %}

[...]

/*
 * Any input which starts with a string marker is assumed to be a string entirely.
 * Hence these two full-line regexes must come before the 'detect string anywhere in the input'
 * regexes which come after these. Those latter regexes will help us parse statements like
 *
 *       'CONCAT("THE YEAR", " ", "2013")'
 *
 *
 * Regex notes
 * -----------
 *
 *       (.*?)
 *
 *  is written like that, i.e. as a NON-greedy regex atom, to ensure that the
 *  optional `'?` / `"?` following it is actually filled when the string terminates
 *  with such a quote. Would the `.*` expression have been greedy, then the regex
 *  engine would legally ignore the following `'?` / `"?` completely as those quotes
 *  would have matched the previous `.*` already, while still producing a legal
 *  match for the quoted string, e.g. `'hello world'` would then produce a
 *
 *       \1 == "hello world'"            (note the trailing quote)
 *
 *  while we want the regex to 'strip' the outer quotes, if there are any.
 */

"'"(.*?)"'"?$
        %{
            s = this.matches[1];
            s2 = parser.dedupQuotedString(s, "'");
            yytext = (new ASTvalue(s2, FKW_VALUE | FT_STRING | FU_STRING))
                .setNotationAttributes(FKA_DELIMITERS_SINGLEQUOTE)
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex());
            return 'STRING';
        %}

'"'(.*?)'"'?$
        %{
            s = this.matches[1];
            s2 = parser.dedupQuotedString(s, '"');
            yytext = (new ASTvalue(s2, FKW_VALUE | FT_STRING | FU_STRING))
                .setNotationAttributes(FKA_DELIMITERS_DOUBLEQUOTE)
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex());
            return 'STRING';
        %}

"'"([^']*("''"[^']*)*)"'"
        %{
            s = this.matches[1];
            s2 = parser.dedupQuotedString(s, "'");
            yytext = (new ASTvalue(s2, FKW_VALUE | FT_STRING | FU_STRING))
                .setNotationAttributes(FKA_DELIMITERS_SINGLEQUOTE)
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex());
            return 'STRING';
        %}

'"'([^"]*('""'[^"]*)*)'"'
        %{
            s = this.matches[1];
            s2 = parser.dedupQuotedString(s, '"');
            yytext = (new ASTvalue(s2, FKW_VALUE | FT_STRING | FU_STRING))
                .setNotationAttributes(FKA_DELIMITERS_DOUBLEQUOTE)
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex());
            return 'STRING';
        %}


\s+
        /*: skip whitespace */

<<EOF>>
        %{
            yytext = (new lexerToken(FKA_EOF))
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex())
                .setLexedText(yytext);
            return 'EOF';
        %}

.
        %{
            yytext = (new ASTerror(FERR_UNSUPPORTED_INPUT, "Don't know what to do with this: it's unsupported input."))
                .setLocationInfo(yylloc)
                .setCommentsIndex(parser.getNextCommentIndex())
                .setLexedText(yytext);
            return 'error';
        %}


/lex


%token NUMBER INTEGER_NUMBER
%token STRING
%token TRUE FALSE
%token CONSTANT
/*
 * functions all produce the `FUNCTION` lexer token:
 * parameter list validation is performed in the static analysis phase during parsing.
 */
%token FUNCTION

[...]


%start start_parsing


%{
    /*
     * This chunk is included in the parser code, before the lexer definition section and after the parser has been defined.
     *
     * WARNING:
     *
     * Meanwhile, keep in mind that all the parser actions, which will execute inside the `parser.performAction()` function,
     * will have a `this` pointing to `$$`.
     *
     * If you want to access the lexer and/or parser, these are accessible inside the parser rule action code via
     * the `yy.lexer` and `yy.parser` dereferences respectively.
     */

    console.log("parser object definition: ", this);
%}

%% /* language grammar */

start_parsing
    : init_phase do_the_work EOF
        {
            if (typeof console !== 'undefined') {
                console.log($2);
            }
            return $2;
        }
    ;

init_phase
    : /* epsilon */
        {
            /*
             * The 'init phase' is always reduced for every parse invocation.
             *
             * At this point in time, nothing has happened yet: no token has
             * been lexed, no real statement has been parsed yet.
             *
             * The grammar has been constructed such that this rule can be
             * resolved without any look-ahead, thanks to a 'default action'.
             */
            //yy.lexer.options.backtrack_lexer = true;
            //yy.lexer.options.ranges = true;             // required for the inline comments to work as the start location is tracked via the `yylloc.range[]`
            yy.lexer.options.inline_comment_mode = yy.inline_comment_mode || 0;

            // and make sure the comments store is prepped:
            yy.parser.clearComments();

            // and init the symbol tables if the caller didn't do so already (we do this in order to prevent undesirable crashes)
            if (typeof yy.parser._symbol2token_lookup_table === "undefined") {
                yy.parser.initSymbolTable([]);
            }

            [...]
        }
    ;


[...]


/*
 * And here endeth the parser proper
 * ---------------------------------
 *
 * This concludes the grammar rules definitions themselves.
 * What follows is a chunk of support code that JISON will include in the generated parser.
 */


%%


/*
 * This chunk is included in the parser object code,
 * following the 'init' code block that may be set in `%{ ... %}` at the top of this
 * grammar definition file.
 */


[...]


/*
 * Remove duplicated quotes (of the form `quote_str + quote_str`, e.g. `""`) from the input string.
 */
parser.dedupQuotedString = function(str, quote_str) {
    return str.replace(quote_str + quote_str, quote_str);
};


/*
 * Return the definition structure for the given symbol name, or FALSE when the symbol is unknown.
 *
 * The definition structure contains these elements:
 *
 * - token
 * - defined_value
 */

parser.getSymbol4Function = function(name) {
    name = name.toUpperCase();
    var rv = this._symbol2token_lookup_table.functions[name];
    return rv || false;
};

parser.getSymbol4DefinedConstant = function(name) {
    name = name.toUpperCase();
    var rv = this._symbol2token_lookup_table.constants[name];
    return rv || false;
};


[...]


/*
 * Initialize the symbol lookup tables.
 */
parser.initSymbolTable = function(custom_symbols) {
    this._symbol2token_lookup_table = {
        functions: {},                  // hash table ~ dictionary
        constants: {},                  // hash table ~ dictionary
        [...]
    };

    // first set up the default symbols: constants, etc.; once that is done, register the custom symbols:
    this.addSymbols(predefined_formula_constants, FSC_PREDEFINED_CONSTANT);
    this.addSymbols(predefined_formula_functions, FSC_FUNCTION);

    return this.addSymbols(custom_symbols);
};


/*
 * Register one or more symbols in the symbol lookup tables:
 */
parser.addSymbols = function(symbols, default_category) {
[...]
    return this;
};


[...]


/*
 * Clear / (re-)initialize the comments' store.
 */
parser.clearComments = function() {
    this.comments = [];
    return this;
};


/*
 * Return the index to the next available slot in the comment store.
 *
 * Consequently returns 0 when the comment store is empty.
 */
parser.getNextCommentIndex = function() {
    var rv = this.comments;
    if (typeof rv !== "undefined" && rv.length > 0) {
        return rv.length;
    } else {
        return 0;
    }
};
	%options ranges
	%options backtrack_lexer

	/*
	* lexical grammar
	* ===============
	*
	* This section defines the lexer rules for our formula parser. The rules are checked from top to bottom, so order is import
	* here!
	*
	* [...]
	*/

	%lex

	/*
	* Remember that in `jison`, when the `lexer.option.flex` has not been set (i.e. we get default behaviour),
	* we get a hit on the first matching regex, so the order of the tokenization regexes below is
	* very important.
	*
	* `option.flex` would perform an exhaustive scan of all regexes, thus trying to find the
	* longest match every time. We do not want that in our lexical scanner!
	*/

	%{
	/*
	* This chunk is included in the lexer action code at the very start of that method.
	*
	* `YY_START` is defined then, `YYSTATE` is not! `yy` and `yy_` are also available here.
	*/
	var s, s2, s3;
	var rv, rv2, e_offset, col, row, len, value;
	var match, match2;

	console.log("lexer action: ", yy, yy_, this, yytext, YY_START, $avoiding_name_collisions);

	var parser = yy.parser;
	%}


	/*
	* WARNING
	* -------
	*
	* When you use these regex 'macros' below, be aware that JISON surrounds them with () braces
	* to ensure they always act as a single element.
	*
	* Hence, for example, JISON transforms the lexer regex
	*
	* ({ID}(\.{ID}))(\s\()
	*
	* into this JS regex
	*
	* /^(?:(([a-zA-Z_][a-zA-Z0-9_])(\.([a-zA-Z_][a-zA-Z0-9_])))(\s\())/
	*
	* which will return more `matches[]` elements than you would expect from the lexer regex itself
	* as the regex element
	*
	* {ID}
	*
	* itself expands to a (...)-surrounded regex element
	*
	* ([a-zA-Z_][a-zA-Z0-9_]*)
	*
	* therefore placing the part matching
	*
	* (\s*\()
	*
	* at `matches[]` index `[5]` rather than the originally expected `[3]`, so that input
	*
	* MIN(x, y)
	*
	* will have the example regex match the part
	*
	* MIN(
	*
	* as intended, while producing a `this.matches[]` array with the following content:
	*
	* this.matches = [
	* "MIN(",
	* "MIN",
	* "MIN",
	* undefined,
	* undefined,
	* "("
	* ]
	*
	* (note the `undefined` entries at `[3] `and `[4]` in there!), while input
	*
	* A.B.C(x)
	*
	* will have the example regex match the part
	*
	* A.B.C(
	*
	* as intended, while producing a `this.matches[]` array with the following content:
	*
	* this.matches = ["A.B.C(", "A.B.C", "A", ".C", "C", "("]
	*/

	ID [a-zA-Z_][a-zA-Z0-9_]*
	DOTTED_ID [a-zA-Z_]([a-zA-Z0-9_.]*[a-zA-Z0-9_])?



	%%


	// Recognize any function ID, with optional dotted sections, as a string which is followed by a `(` open brace, e.g. `A.DIST(`

	{DOTTED_ID}(\s*\()
	%{
	/*
	* lookup this blurb: it MUST be a (possibly namespaced) function identifier
	* (e.g. `SUM`, `namespace.user_defined_function42`).
	*
	* Note that this is really another kind of lexical hack, just not the well-known
	* `yacc` / `lex` 'feedback' one, as here we include a part of the GRAMMAR KNOWLEDGE
	* in the lexer itself:
	*
	* since we 'know' now that the blurb `\1` is followed by an open brace `(`, we
	* can be certain that this is a function identifier and nothing else
	* that may have the same 'name', e.g. constant `E` or `PI`.
	*
	* > ### Note
	* >
	* > the braces in the regex are there so we can easily grab that bit,
	* > and in particular that very last bit: it will ALWAYS be pushed back
	* > into the lexer queue as that bit is our 'additional look-ahead' at
	* > work!
	*/
	console.log("looking up function identifier token (+ look-ahead) in symbol table: ", yytext, this, this.matches);
	/*
	* WARNING: take heed of the comment further above regarding the `ID` etc.
	* lexer regex 'macros' and JISON's behaviour regarding those!
	*
	* Hence we should be able to pick up the `(` at the end at `this.matches[3]`!
	*/
	this.unput(this.matches[3]);
	s = this.matches[1];
	rv = parser.getSymbol4Function(s);
	if (rv) {
	yytext = (new ASTopcode(rv))
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex())
	.setLexedText(s);
	// ASTopcode(FKW_FUNCTION \| ...)
	return 'FUNCTION';
	}

	// when we get here, the blurb didn't match anything sensible...
	yytext = (new ASTerror(FERR_EXPECTED_FUNCTION_NAME, "Expected a (possibly namespaced) function name."))
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex())
	.setLexedText(s);
	return 'error';
	%}

	[...]

	"\|\|"
	%{
	yytext = (new ASTopcode(FKW_BOOLEAN_OR_OPERATOR \| FT_BOOLEAN \| FU_DERIVED))
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex())
	.setLexedText(yytext);
	return 'BOOLEAN_OR_OPERATOR';
	%}

	[...]

	"\u201c"([^\u201d]*)"\u201d"
	%{ /* “string” */
	s = this.matches[1];
	yytext = (new ASTvalue(s, FKW_VALUE \| FT_STRING \| FU_STRING))
	.setNotationAttributes(FKA_DELIMITERS_201C)
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex());
	return 'STRING';
	%}

	[...]

	/*
	* Any input which starts with a string marker is assumed to be a string entirely.
	* Hence these two full-line regexes must come before the 'detect string anywhere in the input'
	* regexes which come after these. Those latter regexes will help us parse statements like
	*
	* 'CONCAT("THE YEAR", " ", "2013")'
	*
	*
	* Regex notes
	* -----------
	*
	* (.*?)
	*
	* is written like that, i.e. as a NON-greedy regex atom, to ensure that the
	* optional `'?` / `"?` following it is actually filled when the string terminates
	* with such a quote. Would the `.*` expression have been greedy, then the regex
	* engine would legally ignore the following `'?` / `"?` completely as those quotes
	* would have matched the previous `.*` already, while still producing a legal
	* match for the quoted string, e.g. `'hello world'` would then produce a
	*
	* \1 == "hello world'" (note the trailing quote)
	*
	* while we want the regex to 'strip' the outer quotes, if there are any.
	*/

	"'"(.*?)"'"?$
	%{
	s = this.matches[1];
	s2 = parser.dedupQuotedString(s, "'");
	yytext = (new ASTvalue(s2, FKW_VALUE \| FT_STRING \| FU_STRING))
	.setNotationAttributes(FKA_DELIMITERS_SINGLEQUOTE)
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex());
	return 'STRING';
	%}

	'"'(.*?)'"'?$
	%{
	s = this.matches[1];
	s2 = parser.dedupQuotedString(s, '"');
	yytext = (new ASTvalue(s2, FKW_VALUE \| FT_STRING \| FU_STRING))
	.setNotationAttributes(FKA_DELIMITERS_DOUBLEQUOTE)
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex());
	return 'STRING';
	%}

	"'"([^']("''"[^'])*)"'"
	%{
	s = this.matches[1];
	s2 = parser.dedupQuotedString(s, "'");
	yytext = (new ASTvalue(s2, FKW_VALUE \| FT_STRING \| FU_STRING))
	.setNotationAttributes(FKA_DELIMITERS_SINGLEQUOTE)
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex());
	return 'STRING';
	%}

	'"'([^"]('""'[^"])*)'"'
	%{
	s = this.matches[1];
	s2 = parser.dedupQuotedString(s, '"');
	yytext = (new ASTvalue(s2, FKW_VALUE \| FT_STRING \| FU_STRING))
	.setNotationAttributes(FKA_DELIMITERS_DOUBLEQUOTE)
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex());
	return 'STRING';
	%}


	\s+
	/: skip whitespace /

	<<EOF>>
	%{
	yytext = (new lexerToken(FKA_EOF))
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex())
	.setLexedText(yytext);
	return 'EOF';
	%}

	.
	%{
	yytext = (new ASTerror(FERR_UNSUPPORTED_INPUT, "Don't know what to do with this: it's unsupported input."))
	.setLocationInfo(yylloc)
	.setCommentsIndex(parser.getNextCommentIndex())
	.setLexedText(yytext);
	return 'error';
	%}




	/lex




	%token NUMBER INTEGER_NUMBER
	%token STRING
	%token TRUE FALSE
	%token CONSTANT
	/*
	* functions all produce the `FUNCTION` lexer token:
	* parameter list validation is performed in the static analysis phase during parsing.
	*/
	%token FUNCTION

	[...]



	%start start_parsing




	%{
	/*
	* This chunk is included in the parser code, before the lexer definition section and after the parser has been defined.
	*
	* WARNING:
	*
	* Meanwhile, keep in mind that all the parser actions, which will execute inside the `parser.performAction()` function,
	* will have a `this` pointing to `$$`.
	*
	* If you want to access the lexer and/or parser, these are accessible inside the parser rule action code via
	* the `yy.lexer` and `yy.parser` dereferences respectively.
	*/

	console.log("parser object definition: ", this);
	%}

	%% /* language grammar */

	start_parsing
	: init_phase do_the_work EOF
	{
	if (typeof console !== 'undefined') {
	console.log($2);
	}
	return $2;
	}
	;

	init_phase
	: /* epsilon */
	{
	/*
	* The 'init phase' is always reduced for every parse invocation.
	*
	* At this point in time, nothing has happened yet: no token has
	* been lexed, no real statement has been parsed yet.
	*
	* The grammar has been constructed such that this rule can be
	* resolved without any look-ahead, thanks to a 'default action'.
	*/
	//yy.lexer.options.backtrack_lexer = true;
	//yy.lexer.options.ranges = true; // required for the inline comments to work as the start location is tracked via the `yylloc.range[]`
	yy.lexer.options.inline_comment_mode = yy.inline_comment_mode \|\| 0;

	// and make sure the comments store is prepped:
	yy.parser.clearComments();

	// and init the symbol tables if the caller didn't do so already (we do this in order to prevent undesirable crashes)
	if (typeof yy.parser._symbol2token_lookup_table === "undefined") {
	yy.parser.initSymbolTable([]);
	}

	[...]
	}
	;


	[...]


	/*
	* And here endeth the parser proper
	* ---------------------------------
	*
	* This concludes the grammar rules definitions themselves.
	* What follows is a chunk of support code that JISON will include in the generated parser.
	*/


	%%




	/*
	* This chunk is included in the parser object code,
	* following the 'init' code block that may be set in `%{ ... %}` at the top of this
	* grammar definition file.
	*/


	[...]


	/*
	* Remove duplicated quotes (of the form `quote_str + quote_str`, e.g. `""`) from the input string.
	*/
	parser.dedupQuotedString = function(str, quote_str) {
	return str.replace(quote_str + quote_str, quote_str);
	};


	/*
	* Return the definition structure for the given symbol name, or FALSE when the symbol is unknown.
	*
	* The definition structure contains these elements:
	*
	* - token
	* - defined_value
	*/

	parser.getSymbol4Function = function(name) {
	name = name.toUpperCase();
	var rv = this._symbol2token_lookup_table.functions[name];
	return rv \|\| false;
	};

	parser.getSymbol4DefinedConstant = function(name) {
	name = name.toUpperCase();
	var rv = this._symbol2token_lookup_table.constants[name];
	return rv \|\| false;
	};


	[...]


	/*
	* Initialize the symbol lookup tables.
	*/
	parser.initSymbolTable = function(custom_symbols) {
	this._symbol2token_lookup_table = {
	functions: {}, // hash table ~ dictionary
	constants: {}, // hash table ~ dictionary
	[...]
	};

	// first set up the default symbols: constants, etc.; once that is done, register the custom symbols:
	this.addSymbols(predefined_formula_constants, FSC_PREDEFINED_CONSTANT);
	this.addSymbols(predefined_formula_functions, FSC_FUNCTION);

	return this.addSymbols(custom_symbols);
	};


	/*
	* Register one or more symbols in the symbol lookup tables:
	*/
	parser.addSymbols = function(symbols, default_category) {
	[...]
	return this;
	};



	[...]



	/*
	* Clear / (re-)initialize the comments' store.
	*/
	parser.clearComments = function() {
	this.comments = [];
	return this;
	};


	/*
	* Return the index to the next available slot in the comment store.
	*
	* Consequently returns 0 when the comment store is empty.
	*/
	parser.getNextCommentIndex = function() {
	var rv = this.comments;
	if (typeof rv !== "undefined" && rv.length > 0) {
	return rv.length;
	} else {
	return 0;
	}
	};