markjenkins/A_README.md Secret

## A_README.md

      
    Raw
  

              A_README.md
            
          
    This is an early work in progress in pre-release of a LALR(1) parser generator.
(as if the world doesn't have enough of them)
All it does so far is compute the nullable and FIRST sets.
More about what needs to be done is in TODO.

Development language is a bootstrappable sub-variant of Lox, as described in the book Crafting Interpreters by Bob Nystrom.
The sub-variant is equivilent to chapter 24 (Calls and Functions) from part III of the book with the addition of DOUBLE_QUOTE ('"'), pair(a, b), pair_first(p), and pair_second(p) in the runtime. This leaves out the language functionality in subsequent chapters providing closures and classes.
My effort to bootstrap that degree of functionality.
(Up to chapter 23 in functionality)
For development I have been using a reference implementation at this degree of functionality using the original Nystrom code, which is useful for development purposes for its superior error handling and basic repl.
You could also use the completed (chapter 13) Nystrom reference implementation in Java if you added DOUBLE_QUOTE to the runtime and prepended the files object.lox and pair.lox as part of the runtime.

The goal here is to have a bootstrapapable means to
generate a y.tab.c and y.tab.h from bash-2.0.5b/parse.y without relying on heirloom-tools yacc.
heirloom-tools Yacc has a free software license, the Common Development and Distribution License (CDDL) Version 1.0 .
This is not compatible with the free software license of M2libc, the GNU General Public License v3.0 (GPLv3).
CDDL conflict references
1
2
That's not a problem if you're bootstrapping for your own purposes, as you don't need to distribute or retain the tools you used to bootstrap. (This is how the free world was built in the first place :] ). The final work product of a bootstrap like fosslinux/live-bootstrap no longer needs to retain a heirloom Yacc built in this manner.
But, it is a problem for GUIX where they like to keep copies of all built software in /gnu/store and where they want to ensure the end-users are in a position to be able to redistribute everything found there.
The original invocation of yacc or bison in the bash build is
$ yacc -d parse.y

where -d requests a y.tab.h file in addition to the y.tab.c
Because we have a simple I/O system for Lox (INPUT global var and print to standard out), the plan for getting seperate y.tab.h and y.tab.c output is to just run our deterministic parser generator twice on the same input with each file as a different output.
A primitive alternative to a module system is included, the project has seperate .lox files that are concatenated together with dependencies documented at the top as
// REQUIRE dep.lox

So you have to be careful what you do the the global namespace.
The not-easilly bootstrappable deps.py can identify the dependencies and generate a .d file for our Makefile.
The Makefile uses advanced features as well to ease development. Writing a bootstrappable kaem shell script as an alternative will come later and will be the easy part.

  
## abs.lox
fun abs(x){
    if (x<0)
	return -x;
    return x;
}

## action.lox
var ACTION_SHIFT = 0;
var ACTION_REDUCE = 1;
var ACTION_ACCEPT = 2;

fun action_shift(next_state){
    return pair(ACTION_SHIFT, next_state);
}

fun action_reduce(next_state){
    return pair(ACTION_REDUCE, next_state);
}

fun action_accept(){
    return pair(ACTION_ACCEPT, nil);
}

## backslash.lox
// quoting just a backslash screws up emacs javascript mode, which I shouldn't
// really be using
var BACKSLASH = "\";

## char_triplet.lox
// REQUIRE triplet.lox
// REQUIRE newline.lox
// REQUIRE reverse.lox

fun char_triplet(c, c_decimal, c_line_num){
    return triplet(c, c_decimal, c_line_num);
}

var char_triplet_str = triplet_first;

fun char_triplet_decimal(c_triplet){
    return triplet_second(c_triplet);
}

fun char_triplet_line_num(c_triplet){
    return triplet_third(c_triplet);
}

fun char_triplet_is_whitespace(c_triplet){
    c_decimal = char_triplet_decimal(c_triplet);
    // ascii table (decimal)
    // 9  horizontal tab
    // 10 line feed
    // 11 vertical tab
    // 12 form feed
    // 13 carriage return
    // 32 space
    return ( (c_decimal>= 9 and c_decimal<= 13) or
    	     c_decimal == 32 );
}

// returns a triplet containing
// * the c_triplets accumulated
// * the string accumulated
// * the remaining chars or nil if they were all exhausted
fun concatenate_chars_until_newline(chars){
    var str_accum = "";
    var triplet_accum = nil;
    var c_triplet;
    var c_str;
    while ( nil != chars ){
	c_triplet = pair_first(chars);
	c_str = char_triplet_str(c_triplet);
	chars = pair_second(chars);
	if (NEWLINE == c_str)
	    return triplet(reverse(triplet_accum),
			   str_accum,
			   chars);
	str_accum = str_accum + c_str;
	triplet_accum = pair(c_triplet, triplet_accum);
    }
    // if we reached the end of input without finding a newline
    return triplet(reverse(triplet_accum), str_accum, nil);
}

## comppair.lox
var comp_pair_eq = pair_first;
var comp_pair_lt = pair_second;

var form_comp_pair = pair;

## config.lox
SUPPORT_BACKSLASH_AS_ALT_TO_PERCENT = true;
SUPPORT_DOUBLE_QUOTE_LITERALS = true;

## deps.py
#!/usr/bin/env python3

# Does not support circular dependencies or even detection yet, will crash
# trying to resolve them
#
# The lox language supports them, you can have one lox file B that
# has declarations of globals based on other globals (dependency) from A
# and have the other lox file (A) just have functions that rely on globals
# that the other dep (B). Because those functions don't resolve the global
# until called at runtime, it's fine.
#
# So if you have a safe circular dep like this, have a // REQUIRE A.lox
# at the top of B.lox and just put a note in A.lox explaining the
# circular dep

from sys import argv
from os.path import exists

def deps_in_file(filename):
    if not exists(filename):
        raise Exception("%s not found" % filename)

    direct_deps = []
    with open(filename) as f:
        for line in f:
            line_parts = line.strip().split(" ")

            if (len(line_parts) >= 3 and
                line_parts[0]=="//" and
                line_parts[1]=="REQUIRE" and
                line_parts[2].endswith(".lox") ):
                direct_deps.append(line_parts[2])
    return direct_deps

def recursively_find_inner(filename, filename_expansion):
    if filename in filename_expansion:
        new_deps = filename_expansion[filename]
    else:
        new_deps = deps_in_file(filename)
        filename_expansion[filename] = new_deps

    output = [filename]
    for expanded_filename in reversed(new_deps):
        output.extend( recursively_find_inner(
            expanded_filename,
            filename_expansion) )
    output_filtered = []
    output_set = set()
    for output_item in reversed(output):
        if output_item not in output_set:
            output_filtered.append(output_item)
            output_set.add(output_item)
    output_filtered.reverse()
    return output_filtered


def recursively_find(filename):
    result = recursively_find_inner(filename, {})
    result.reverse()
    return result

def main(start_filename):
    if not start_filename.endswith(".lox"):
        raise Exception("dependency search not on a .lox file")
    filename_w_out_prefix = start_filename[:-len(".lox")]
    print("%(filename)s_concat.lox %(filename)s_concat.lox.d: " %
          {'filename': filename_w_out_prefix},
          end="")
    print(" ".join(recursively_find(start_filename)))

if __name__ == "__main__":
    main(argv[1])

## example_1.y
%token varDecl constDecl statement

%%

program: /* empty */
| program declaration;

declaration: varDecl
| constDecl
| statement;

%%

## example_2.y
%token a c d

%start Z

%%

X: Y
 | a;

Y: /* empty */
 | c;

Z: d
 | X Y Z;

%%

## example_3.y
%token a c d

%start Z

%%

X: Y
 | a;

Y: /* empty */
 | c;

Z: d
 | X Y Z;

K: X Y X Y;

%%

## example_4.y
%token '=' x '*'

%%

N: V '=' E
 | E;

E: V;

V: x
 | '*' E;

%%

## example_5.y
%token '=' '*' ID

%%

S: L '=' R
 | R;

L: '*' R
 | ID;

R: L;

%%

## example_6.y
%token c d
%%

S: C C;

C: c C
 | d;

%%

## example_7.y
%token a b c d e

%%

S: a A d
 | b B d
 | a B e
 | b A e;

A: c;

B: c;

%%

## examples.lox
// REQUIRE list.lox
// REQUIRE grammer.lox

// currently this isn't needed because the examples are structured
// identically to the internals of grammer.lox, but we perform this
// conversion just in case
fun convert_list_of_rule_pairs_to_grammer(rules){
    var g = create_grammer();
    var rule;
    while (nil!=rules){
	rule = pair_first(rules);
	g = extend_grammer(g, pair_first(rule), pair_second(rule) );
	rules = pair_second(rules);
    }
    return g;
}

// this grammer comes from
// http://boxbase.org/entries/2019/oct/14/lr1-parsing-tables/
fun example_grammer_1(){
    var rules = list_2(
	pair("program",
	     list_2( list_1(""),
		     list_2("program", "declaration")) ),

	pair("declaration",
	     list_3( list_1("varDecl"),
		     list_1("constDecl"),
		     list_1("statement")) ) );

    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change/
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// from https://www.csd.uwo.ca/~watt/home/courses/2008-09/cs4447a/notes/CS4447%2004%20--%20Parsing%20Algorithms%201.pdf
// directly nullable set is Y
// recursively nullable set is X and Y
// also includes FIRST and FOLLOW sets
// FIRST(X) is a c, FIRST(Y) is c, FIRST(Z) is a c d
// FOLLOW(X) is a c d, FOLLOW(Y) is a c d, FOLLOW(Z) is empty
fun example_grammer_2(){
    var rules = list_3(
	pair("X",
	     list_2( list_1("Y"),
		     list_1("a") ) ),
	pair("Y",
	     list_2( list_1("c"),
		     list_1("") ) ),
	pair("Z",
	     list_2( list_1("d"),
		     list_3("X", "Y", "Z") ) ) );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change/
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// a variation of example_grammer_2 that better tests nullable
fun example_grammer_3(){
    var rules = list_4(
	pair("X",
	     list_2( list_1("Y"),
		     list_1("a") ) ),
	pair("Y",
	     list_2( list_1("c"),
		     list_1("") ) ),
	pair("Z",
	     list_2( list_1("d"),
		     list_3("X", "Y", "Z") ) ),
	pair("K",
	     list_1( list_4("X", "Y", "X", "Y") ) ) );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change/
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// from http://web.cs.dal.ca/~sjackson/lalr1.html
// as cited by sample 1 at
// https://github.com/alexpizarroj/lalr1-table-generator/
fun example_grammer_4(){
    var rules = list_3(
	pair("N",
	     list_2( list_3("V", "=", "E"),
		     list_1("E") ) ),
	pair("E",
	     list_1( list_1("V") ) ),
	pair("V",
	     list_2( list_1("x"),
		     list_2("*", "E") ) ) );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// from the Dragonbook, page 271, example 4.61
// as cited by sample 2 at
// https://github.com/alexpizarroj/lalr1-table-generator/
fun example_grammer_5(){
    var rules = list_3(
	pair("S",
	     list_2( list_3("L", "=", "R"),
		     list_1("R") ) ),
	pair("L",
	     list_2( list_2("*", "R"),
		     list_1("ID") ) ),
	pair("R",
	     list_1( list_1("L") ) ) );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// default example on
// http://jsmachines.sourceforge.net/machines/lalr1.html
// This is also Dragonbook, page 263, grammar 4.55 below example 4.54
// as cited by sample 3 at
// https://github.com/alexpizarroj/lalr1-table-generator/
fun example_grammer_6(){
    var rules = list_2(
	pair("S", list_1( list_2("C", "C") )),
	pair("C", list_2( list_2("c", "C"),
			  list_1("d") ) ) );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// from the Dragonbook, page 267, example 4.58
// as cited by sample 4 at
// https://github.com/alexpizarroj/lalr1-table-generator/
fun example_grammer_7(){
    var rules = list_3(
	pair("S",
	     list_4( list_3("a", "A", "d"),
		     list_3("b", "B", "d"),
		     list_3("a", "B", "e"),
		     list_3("b", "A", "e") ) ),
	pair("A",
	     list_1( list_1("c") ) ),
	pair("B",
	     list_1( list_1("c") ) ) );

    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change
    return convert_list_of_rule_pairs_to_grammer(rules);
}

// sample 5 from
// https://github.com/alexpizarroj/lalr1-table-generator/
fun example_grammer_8(){
    var rules = list_4(
	pair("p",
	     list_1( list_2("title", "ss") ) ),
	pair("title",
	     list_1( list_3("TITLE", "TEXT", "NEWLINE") ) ),
	pair("ss",
	     list_2( list_2("s", "ss"),
		     list_1("s") ) ),
	pair("s",
	     list_6(
		 list_7("NOTE", "LEFT", "OF", "TEXT",
			"':'", "TEXT", "NEWLINE"),
		 list_6("TEXT", "->", "TEXT", "':'", "TEXT", "NEWLINE"),
		 list_6("LOOP", "TEXT", "NEWLINE", "ss", "END", "NEWLINE"),
		 list_5("LOOP", "TEXT", "NEWLINE", "END", "NEWLINE"),
		 list_9("ALT", "TEXT", "NEWLINE", "ss", "ELSE",
			"NEWLINE", "ss", "END", "NEWLINE"),
		 list_6("ALT", "TEXT", "NEWLINE", "ss", "END", "NEWLINE") ) )
    );
    // this conversion currently does nothing because what we have above
    // is the internal structure of our grammer, but
    // we abstract that away in case the grammer internals change
    return convert_list_of_rule_pairs_to_grammer(rules);
}

## grammer-test.lox
// REQUIRE list.lox
// REQUIRE grammer.lox
// REQUIRE examples.lox

fun test(){
    var g = example_grammer_1();
    print_grammer(g);
}

test();

## grammer.lox
// REQUIRE identity.lox
// REQUIRE list.lox
// REQUIRE table.lox
// REQUIRE symbol.lox

// we use our system_table definitions for our grammer, the
// keys in our table are the left hand sides of our rules aka non-terminals
// the values in our tables are the list of rhs rules, each of which is a
// list of symbols
var create_grammer = create_symbol_table;
var extend_grammer = add_sym_table_entry;
var start_grammer_iter = start_table_iter;
var grammer_iter_has_next = table_iter_has_next;
var grammer_iter_current = pair_first;
var grammer_iter_next = pair_second;
var grammer_find_lhs_rhs_pair = find_sym_table_pair;
var grammer_rule_find_rhs_rules_for_lhs = find_sym_table_value;
var grammer_has_lhs = sym_in_table;

var create_grammer_rule = pair;

// e.g. the left hand side of a grammer rule that would be followed by
var grammer_rule_lhs = pair_first;
// e.g. all of the potential right hand sides of a rule, a list of all
// of them
var grammer_rule_right_hand_rules = pair_second;


fun print_grammer(g){
    print "printing grammer with";
    print list_len(g);
    print "rules";
    print "";
    var g_iter = start_grammer_iter(g);
    var lhs;
    var rhs;
    var accum;
    var rule;
    var rule_l;
    while (grammer_iter_has_next(g_iter)){
	rule = grammer_iter_current(g_iter);
	lhs = grammer_rule_lhs(rule);
	print lhs;
	rhs = grammer_rule_right_hand_rules(rule);
	print list_len(rhs);
	while (nil!=rhs){
	    accum = "   "; // 3 spaces, 1 will be added below
	    rule_l = pair_first(rhs);
	    while(nil!=rule_l){
		//print pair_first(rule_l);
		if (pair_first(rule_l) =="")
		    accum = accum + " " + "{EMPTY}";
		else
		    accum = accum + " " + pair_first(rule_l);
		rule_l = pair_second(rule_l);
	    }
	    print accum;
	    rhs = pair_second(rhs);
	}
	g_iter = grammer_iter_next(g_iter);
    }
}

fun rhs_iter_has_next(rhs_iter){
    return nil != rhs_iter;
}

var start_rhs_iter = identity;
var create_rhs_iter = start_rhs_iter; // redundant, clean-up called for
var rhs_iter_current = pair_first;
var rhs_iter_next = pair_second;

var leftmost_symbol_in_rhs_element = pair_first;
var next_sublist_in_rhs_element = pair_second;

// a detailed_grammer has additional information beyond the production
// rules
//
// so far that's just tracking a particular rule_lhs as the start rule
fun create_detailed_grammer(g, start_rule_lhs){
    if ( sym_in_table(g, start_rule_lhs) )
	return pair( start_rule_lhs, g);
    // return nil if the start_rule lts doesn't exist
    return nil;
}

var detailed_grammer_start_lhs = pair_first;
var detailed_grammer_rules = pair_second;

## grammer_first_terminals-test.lox
// REQUIRE table.lox
// REQUIRE list.lox
// REQUIRE reverse.lox
// REQUIRE symbol.lox
// REQUIRE grammer.lox
// REQUIRE grammer_nullable.lox
// REQUIRE grammer_first_terminals.lox
// REQUIRE examples.lox

fun test(){
    var tests_1_5 = list_5(
	pair(example_grammer_1(),
	     list_2(
		 pair("program",
		      list_4("", "statement", "varDecl", "constDecl") ),
		 pair("declaration",
		      list_3("statement", "varDecl", "constDecl") ) ) ),
	pair(example_grammer_2(),
	     list_3( pair("X", list_3("", "a", "c")),
		     pair("Y", list_2("", "c")),
		     pair("Z", list_4("", "a", "c", "d")) ) ),

	pair(example_grammer_3(),
	     list_4( pair("X", list_3("", "a", "c")),
		     pair("Y", list_2("", "c")),
		     pair("Z", list_4("", "a", "c", "d")),
		     pair("K", list_3("", "a", "c")) ) ),

	pair(example_grammer_4(),
	     list_3( pair("N", list_2("*", "x")),
		     pair("E", list_2("*", "x")),
		     pair("V", list_2("*", "x")) ) ),

	pair(example_grammer_5(),
	     list_3( pair("S", list_2("*", "ID")),
		     pair("L", list_2("*", "ID")),
		     pair("R", list_2("*", "ID")) ) )
    );
    var tests = list_3(
	pair(example_grammer_6(),
	     list_2( pair("S", list_2("c", "d")),
		     pair("C", list_2("c", "d")) ) ),
	pair(example_grammer_7(),
	     list_3( pair("S", list_2("a", "b")),
		     pair("A", list_1("c")),
		     pair("B", list_1("c")) ) ),
	pair(example_grammer_8(),
	     list_4(
		 pair("p", list_1("TITLE") ),
		 pair("title", list_1("TITLE") ),
		 pair("ss", list_4("NOTE", "TEXT", "LOOP", "ALT") ),
		 pair("s", list_4("NOTE", "TEXT", "LOOP", "ALT" ) ) ) )
    );
    tests = prepend_list_to_list( reverse(tests_1_5), tests );
    var current_test;
    var i = 1;
    while(nil!=tests){
	current_test = pair_first(tests);
	if ( !run_test(pair_first(current_test), pair_second(current_test)) ){
	    print "test failed";
	    print i;
	}
	tests = pair_second(tests);
	i = i+1;
    }
}

fun run_test(g, expected_members){
    // upper case just to reflect tradition from many papers in the field
    var FIRST = first_terminals_set_table_of_non_terminals(
	g,
	nullable_symbol_set(g),
	true);
    var non_terms;
    var current_pair;
    var test_l;

    while(nil!=expected_members){
	current_pair = pair_first(expected_members);
	non_terms = find_sym_table_value(FIRST, pair_first(current_pair));
	test_l = pair_second(current_pair);
	if (symbol_set_len(non_terms) != list_len(test_l)){
	    print pair_first(current_pair);
	    print "computed FIRST set does not match count of expected";
	    print symbol_set_len(non_terms);
	    print list_len(test_l);
	    return false;
	}
	while(nil!=test_l){
	    if( !is_symbol_set_member(non_terms, pair_first(test_l)) ){
		print pair_first(test_l) + " is not found in " +
		    pair_first(current_pair);
		return false;
	    }
	    test_l = pair_second(test_l);
	}
	expected_members = pair_second(expected_members);
    }
    return true;
}

test();

## grammer_first_terminals.lox
// REQUIRE symbolset.lox
// REQUIRE list.lox
// REQUIRE symbolsettable.lox

// include_empty allows the empty string "" to be a terminal returned
fun recursive_gather_first_terminals_table_of_non_terminals(
    g, nullable, non_terminals_visited,
    current_lhs, include_empty){

    // catch non terminals we've already gathered and return the cached
    // value
    if (sym_in_table(non_terminals_visited, current_lhs)){
	// replace with return non_terminals_visited
	// once we refactor the return value
	return non_terminals_visited;
    }
    non_terminals_visited = add_empty_symbol_set_to_sym_table(
	non_terminals_visited, current_lhs);

    var current_rhs = grammer_rule_find_rhs_rules_for_lhs(g, current_lhs);
    if(nil==current_rhs){
	print "error, recursed to non existent non-terminal " + current_lhs;
	return nil;
    }

    var first_terminals_set_for_current = create_symbol_set();

    var rhs_iter = create_rhs_iter(current_rhs);
    var right_hand_list;
    var right_hand_list_iter;
    var right_hand_side;
    var right_hand_first;
    var right_hand_second;
    var non_terminal_first;
    while( rhs_iter_has_next(rhs_iter) ){
	right_hand_list = rhs_iter_current(rhs_iter);
	right_hand_first = leftmost_symbol_in_rhs_element(right_hand_list);

	// form a union of all first terminals found by going into
	// every leading non-terminals that's nullable
	while( nil!=right_hand_list and
	       is_symbol_set_member(nullable, right_hand_first) ){

	    // expand non_terminals_visited and first_terminals_set_for_current
	    // recursively
	    // if the non-terminal driving the recursion is already in
	    // non_terminals_visited, it's no problem because this
	    // is caught at the top of this recursive function
	    non_terminals_visited =
		recursive_gather_first_terminals_table_of_non_terminals(
		    g, nullable, non_terminals_visited,
		    right_hand_first,
		    include_empty);

	    first_terminals_set_for_current = symbol_set_union(
		first_terminals_set_for_current,
		find_sym_table_value(non_terminals_visited,
				     right_hand_first) );

	    right_hand_list = next_sublist_in_rhs_element(right_hand_list);
	    if (nil!=right_hand_list){
		right_hand_first = leftmost_symbol_in_rhs_element(
		    right_hand_list);
	    }
	}

	// if we didn't exhaust the right hand side, find out if we
	// have a non-terminal in what's left, so that the following
	// two if clauses only need to look that up once
	// a continue statement would come in handy here!
	if (nil!=right_hand_list)
	    non_terminal_first = grammer_has_lhs(g, right_hand_first);

	// confirm we didn't exhast the right hand side
	// if we're dealing with a rule with a non-terminal as a first
	// element, some recursion is called for
	if (nil!=right_hand_list and non_terminal_first){
	    // expand non_terminals_visited and first_terminals_set_for_current
	    // recursively
	    // if the non-terminal driving the recursion is already in
	    // non_terminals_visited, it's no problem because this
	    // is caught at the top of this recursive function
	    non_terminals_visited =
		recursive_gather_first_terminals_table_of_non_terminals(
		    g, nullable, non_terminals_visited,
		    right_hand_first,
		    include_empty);

	    first_terminals_set_for_current = symbol_set_union(
		first_terminals_set_for_current,
		find_sym_table_value(non_terminals_visited,
				     right_hand_first));
	}
	// assuming we didn't exhaust right_hand_list
	// then if else we're dealing with a terminal (!non_terminal_first)
	// add it to the set for current
	if (nil!=right_hand_list and !non_terminal_first){
	    // store the terminal found
	    //
	    // if include_empty is true this includes the case where the
	    // terminal is the empty string
	    //
	    // we don't need to worry about there being something else
	    // after the empty string
	    // a production should only have
	    // empty string on the right hand side and nothing following it
	    // it really only exists for left recursive cases
	    // A -> ""
	    // A -> A b
	    // a grammer that includes "", b, bb, bbb..
	    if ("" != right_hand_first or include_empty)
		first_terminals_set_for_current = symbol_set_add(
		    first_terminals_set_for_current,
		    right_hand_first);
	}
	rhs_iter = rhs_iter_next(rhs_iter);
    }

    // eventually we'll start returning this instead of
    // first_terminals_set_for_current
    non_terminals_visited = update_sym_table_entry(
	non_terminals_visited, current_lhs, first_terminals_set_for_current);

    return non_terminals_visited;
}

fun first_terminals_set_table_of_non_terminals(
    g, nullable, include_empty){
    var current_first_table_combined_len = -1;
    var first_table = create_symbol_set_table();
    var g_iter;
    // while condition guaranteed to be true the first time (0>-1)
    // for want of a do while construct
    //
    // purpose of the loop being to keep going until
    // the FIRST table stabilizes in the size of the sets inside of it.
    while( symbol_set_table_combined_len(first_table) >
	   current_first_table_combined_len ){

	// yeah, could avoid doing this twice by putting an assignment
	// in the loop condition
	current_first_table_combined_len = symbol_set_table_combined_len(
	    first_table);

	// any grammer rule could be the starting point, so we try starting
	// from each of them
	g_iter = start_grammer_iter(g);
	while(grammer_iter_has_next(g_iter)){
	    first_table =
		recursive_gather_first_terminals_table_of_non_terminals(
		    g,
		    nullable,
		    first_table, // non_terminals_visited
		    grammer_rule_lhs(
			grammer_iter_current(g_iter)), // current_lhs
		    include_empty); // include_empty
	    g_iter = grammer_iter_next(g_iter);
	}
    }
    return first_table;
}

## grammer_nullable-test.lox
// REQUIRE grammer_nullable.lox
// REQUIRE list.lox
// REQUIRE examples.lox

fun test(){
    var test_grammers = list_3(
	example_grammer_1(),
	example_grammer_2(),
	example_grammer_3() );
    var n =1;
    while(nil!=test_grammers){
	var g = pair_first(test_grammers);
	var dir_nullable = directly_nullable_symbol_set(g);
	print "example";
	print n;
	print "directly nullable";
	print list_len(dir_nullable);
	list_print(dir_nullable);
	print "recursivley nullable";
	var recur_nullable = nullable_symbol_set(g);
	print list_len(recur_nullable);
	list_print(recur_nullable);
	print "";
	n = n+1;
	test_grammers = pair_second(test_grammers);
    }
}

test();

## grammer_nullable.lox
// REQUIRE symbolset.lox
// REQUIRE grammer.lox

// very helpful
// https://piazza.com/class_profile/get_resource/i7dughpewipz/i8gu6ig5f6w6o7
// file name LALR1howto.pdf

// e.g. a first pass that just identifies left hand sides that are nullable
// in their own right and not recursively by way of following first productions
fun directly_nullable_symbol_set(g){
    var resultset = create_symbol_set();
    var g_iter = start_grammer_iter(g);
    var rule_pair;
    var lhs;
    var rhs;
    var rhs_iter;
    var rhs_current;
    var found_nullable;
    while(grammer_iter_has_next(g_iter)){
	rule_pair = grammer_iter_current(g_iter);
	lhs = grammer_rule_lhs(rule_pair);
	rhs = grammer_rule_right_hand_rules(rule_pair);

	rhs_iter = start_rhs_iter(rhs);
	found_nullable = false;
	while(rhs_iter_has_next(rhs_iter) and !found_nullable){
	    rhs_current = rhs_iter_current(rhs_iter);
	    if (list_len(rhs_current)==1 and
		pair_first(rhs_current)==""){
		resultset = symbol_set_add(resultset, lhs);
		found_nullable = true; // for lack of break statement
	    }
	    rhs_iter = rhs_iter_next(rhs_iter);
	}

	g_iter = grammer_iter_next(g_iter);
    }
    return resultset;
}

fun nullable_symbol_set(g){
    var nullable = directly_nullable_symbol_set(g);
    var current_nullable_len = set_len(nullable);
    var g_iter;
    var rule_pair;
    var lhs;
    var rhs;
    var rhs_iter;
    var found_nullable;

    // -1 to ensure first run of outer
    // while loop happens, could really use do-while
    // loop until nullable set stabalizes in size
    var last_nullable_len = -1;
    while (current_nullable_len!=last_nullable_len){
	g_iter = start_grammer_iter(g);
	while(grammer_iter_has_next(g_iter)){
	    rule_pair = grammer_iter_current(g_iter);
	    lhs = grammer_rule_lhs(rule_pair);
	    rhs = grammer_rule_right_hand_rules(rule_pair);

	    if(!is_symbol_set_member(nullable, lhs)){
		rhs_iter = start_rhs_iter(rhs);
		found_nullable = false;
		while(rhs_iter_has_next(rhs_iter) and !found_nullable){
		    if(all_l_members_in_symbol_set(rhs_iter_current(rhs_iter),
						   nullable)){
			nullable = symbol_set_add(nullable, lhs);
			found_nullable = true; // for lack of break statement
		    }
		    rhs_iter = rhs_iter_next(rhs_iter);
		}
	    }
	    g_iter = grammer_iter_next(g_iter);
	}

	last_nullable_len = current_nullable_len;
	current_nullable_len = set_len(nullable);
    }
    return nullable;
}

## identity.lox
fun identity(x){
    return x;
}

## inout.lox
// this is a basic IO test that reads in char_triplet structures
// from a global variable INPUT and prints lines of accumulated characters
// back out
//
// generate a .lox file with an INPUT global variable using charinput.py from
// https://gist.github.com/markjenkins/1a39e62537de9f08648aaf0c82e7d689
// python3 char_input.py infile.txt outfile_INPUT.lox

// REQUIRE triplet.lox
// REQUIRE char_triplet.lox

{
  var chars = INPUT;
  var concat_triplet;
  var concat_str;
  while( nil != chars ){
    concat_triplet = concatenate_chars_until_newline(chars);
    chars = triplet_last(concat_triplet);
    concat_str = triplet_second(concat_triplet);
    if( "" != concat_str )
      print concat_str;
  }
}

## int2string-test.lox
// REQUIRE int2string.lox

fun test(){
    print int2string(42) + " is the answer";
}

test();

## int2string.lox
// REQUIRE modarith.lox

fun zerotonine2string(i){
    // binary search for performance
    if(i<5) {
	if(i<2){
	    if(i==0)
		return "0";
	    if(i==1)
		return "1";
	}
	else { // <5 and >=2
	    if(i==2)
		return "2";
	    else { // <5 and >2
		if(i==3)
		    return "3";
		if(i==4)
		    return "4";
	    }
	}
    }
    else { // >= 5
	if(i<7){
	    if(i==5)
		return "5";
	    if(i==6)
		return "6";
	}
	else { // >= 7
	    if(i==7)
		return "7";
	    else { // > 7
		if(i==8)
		    return "8";
		if(i==9)
		    return "9";
	    }
	}
    }

    // if all tests for exactly 0-9 fail
    print "zerotonine2string(i) recieved i that was not 0<=i<10";
    return nil;
}

// this only works on lox implementations that use int instead of float
fun int2string(i){
    var accum = "";
    while(0!=i){
	accum = zerotonine2string(mod(i, 10)) + accum;
	i = i / 10;
    }
    return accum;
}

## LICENSE
Copyright Mark Jenkins <mark@markjenkins.ca>, 2022

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

## list.lox
// REQUIRE print.lox

fun list_len(l){
    var count = 0;
    while(nil!=l){
	count = count+1;
	l = pair_second(l);
    }
    return count;
}

fun list_1(el){
    return pair(el, nil);
}

fun list_2(one, two){
    return pair(one, list_1(two));
}

fun list_3(one, two, three){
    return pair(one, list_2(two, three));
}

fun list_4(one, two, three, four){
    return pair(one, list_3(two, three, four));
}

fun list_5(one, two, three, four, five){
    return pair(one, list_4(two, three, four, five));
}

fun list_6(one, two, three, four, five, six){
    return pair(one, list_5(two, three, four, five, six));
}

fun list_7(one, two, three, four, five, six, seven){
    return pair(one, list_6(two, three, four, five, six, seven));
}

fun list_8(one, two, three, four, five, six, seven, eight){
    return pair(one, list_7(two, three, four, five, six, seven, eight));
}

fun list_9(one, two, three, four, five, six, seven, eight, nine){
    return pair(one, list_8(two, three, four, five,
                            six, seven, eight, nine));
}

fun list_foreach(l, f){
    while(nil!=l){
	f(pair_first(l));
	l = pair_second(l);
    }
}

fun list_print(l){
    if (list_len(l)==0)
	print "{empty list}";
    else
	list_foreach(l, print1);
}

fun str_list_print(l){
    var accum = "";
    var item;
    if (nil==l)
	print "{empty list}";
    var sep = "";
    while(nil!=l){
	item = pair_first(l);
	if(""==item)
	    accum = accum + sep + "{empty string}";
	else
	    accum = accum + sep + item;
	sep = " ";
	l = pair_second(l);
    }
    print accum;
}

fun list_access_n(l, n){
    var i = 0;
    while(nil!=l){
	if(i==n)
	    return pair_first(l);
	l = pair_second(l);
	i = i + 1;
    }
    print "list_access_n failed due to list exhaution";
    return nil;
}

fun prepend_list_to_list(l1, l2){
    var new_list = l2;
    while(nil!=l1){
	new_list = pair(pair_first(l1), new_list);
	l1 = pair_second(l1);
    }
    return new_list;
}

## Makefile
TESTS=symbol-set-test.lox grammer-test.lox grammer_nullable-test.lox grammer_first_terminals-test.lox symbol_table-test.lox int2string-test.lox
TESTSCONCAT=$(TESTS:.lox=_concat.lox)


default: ${TESTSCONCAT}

%_concat.lox:
	cat $^ > $@

# fixme, should really derive $< from $@ so that no right hand dep needs to be
# listed
%_concat.lox.d: %.lox
	python3 deps.py $< > $@

int2string-test_concat.lox.d: int2string-test.lox
symbol-set-test_concat.lox.d: symbol-set-test.lox
symbol_table-test_concat.lox.d: symbol_table-test.lox
grammer-test_concat.lox.d: grammer-test.lox
grammer_nullable-test_concat.lox.d: grammer_nullable-test.lox
grammer_first_terminals-test_concat.lox.d: grammer_first_terminals-test.lox


# to use this, you also need to prepend a lox file with a global var INPUT
# see comment at the top of inout.lox
inout_concat.lox.d: inout.lox

include $(TESTSCONCAT:_concat.lox=_concat.lox.d)

.PHONY: default

## modarith.lox
// REQUIRE abs.lox

// this only works on lox implementations that use integers instead of
// floating point
fun mod(a, b){
    var quotiant = a/b;
    return abs(a-(quotiant*b));
}

## newline.lox
var NEWLINE = "
";

## object.lox
class OBJECT {}

## pair.lox
// class OBJECT {} needs to be in the runtime or object.lox appear before this
fun pair(a, b) {
  var p = OBJECT();
  p.first = a;
  p.second = b;
  return p;
}

// normally you would just do p.first and p.second yourself
// but if these are defined in the runtime they allow you to access the
// two halves of the pair with functions
// and without . operator needed to be implemented in the compiler

fun pair_first(p){
    return p.first;
}

fun pair_second(p){
    return p.second;
}

## partition.lox
// Henri Tuhola https://boxbase.org/entries/2019/oct/14/lr1-parsing-tables/
// helped me think of all the notation on this subject using the dot operator
// as really being about partitioning


## print.lox
fun print1(p){
    print p;
    print ";";
}

## REFERENCES
Yacc: Yet Another Compiler-Compiler , 1975
Stephen C. Johnson
AT&T Bell Laboratories
Murray Hill, New Jersey 07974
Comically, some websites have "Murry Hill" in their metadata as a co-author!

How to write your own LR(1) parsing tables (and generate them)
Henri Tuhola, blog boxbase.org
https://twitter.com/HenriTuhola
https://boxbase.org/entries/2019/oct/14/lr1-parsing-tables/

Dragon Book
Proprietary and not officially made available on the public internet by Addison-Wesley


Practical Translators for LR(k) Languages
Franklin Lewis DeRemer
1969
Project MAC
Massachusetts Institute Of Technology


Efficient LR(1) Parsers
Anderson T, Eve J, Horning JJ
Report
Computing Laboratory Technical Report Series
1971 (wikipedia says 1973)
https://eprints.ncl.ac.uk/159990
https://eprints.ncl.ac.uk/file_store/production/159990/2017F628-ECE1-4A3A-9113-FFF0B2D9658B.pdf

LALR 1 Parsing | Compiler Design
tutorialandexample.com
https://www.tutorialandexample.com/lalr-1-parsing


https://stackoverflow.com/questions/8242509/how-does-the-yacc-bison-lalr1-algorithm-treat-empty-rules


https://piazza.com/class_profile/get_resource/i7dughpewipz/i8gu6ig5f6w6o7
LALR1howto.pdf
"""Abstract: Many textbooks describe construction of the LALR(1) context-free state machine (CFSM) with detailed pseudocode that, while it may be a useful guide to producing code, is less helpful than it ought to be for understanding how and why the construction works. is is an attempt to explain the construction well enough that you can actually work examples by hand, by explaining both exactly how it works and why each step is the way it is."""

3.3.5 Practical Considerations for LALR(1) Grammars
https://lambda.uta.edu/cse5317/notes/node21.html

Parsing Algorithms
Lecture notes
CS 4447/CS 9545 -- Stephen Watt
University of Western Ontario
https://www.csd.uwo.ca/~watt/home/courses/2008-09/cs4447a/notes/CS4447%2004%20--%20Parsing%20Algorithms%201.pdf


Interactive LALR(1) Parser Generator in javascript
http://jsmachines.sourceforge.net/machines/lalr1.html


https://github.com/alexpizarroj/lalr1-table-generator/

## reverse.lox
fun reverse(l){
    var accum = nil;
    var l_pair = pair; // faster local reference
    var l_pair_first = pair_first; // faster local reference
    var l_pair_second = pair_second; // faster local reference
    while (nil!=l){
	accum = l_pair(l_pair_first(l), accum);
	l = l_pair_second(l);
    }
    return accum;
}

## set.lox
// this is not an efficient implementation, checking for set membership
// does not need to be a O(n) operation (going through all existing members)
//
// using just pair(), something like a red black tree would make sense,
// this is why set_add and set_union take a pair of comparison functions
// (eq and lt), because at some point making use of lt as well may make sense
//
// other better performing options would to use a built-in hash table type
// or a built-in vector type, built-in hash function (output int) for
// strings and a built-in function for modular arithmatic to implement
// a hash table in lox from those primitives

// depends on
// REQUIRE comppair.lox
// REQUIRE list.lox

fun create_set(){
    return nil;
}

fun is_set_member(s, member, comppair){
    var comp_eq = comp_pair_eq(comppair);
    var current = s;
    while ( nil != current ){
	if( comp_eq( pair_first(current), member) )
	    return true;
	current = pair_second(current);
    }
    return false;
}

fun set_add(s, new_set_member, comppair){
    if( is_set_member(s, new_set_member, comppair) )
	return s;
    return pair(new_set_member, s);
}

fun set_union(s1, s2, comppair){
    // no work to do if either set is nil (empty), just use the other one
    if (nil==s1)
	return s2;
    // for clarity, technically not needed as the loop below would be fine
    if (nil==s2)
	return s1;

    // start with empty set okay, first call to set_member will be false
    var combined_set = s1;
    var current = s2;
    while (nil!=current){
	combined_set = set_add(combined_set, pair_first(current), comppair);
	current = pair_second(current);
    }

    return combined_set;
}

fun set_len(s){
    return list_len(s);
}

## symbol-set-test.lox
// REQUIRE symbolset.lox

var s = create_symbol_set();
s = symbol_set_add(s, "abba");
s = symbol_set_add(s, "cdda");
s = symbol_set_add(s, "effe");
print is_symbol_set_member(s, "cdda");

## symbol.lox
// REQUIRE table.lox

fun symbol_eq(sym1, sym2){
    return sym1==sym2;
}

// lt operator nil because we don't yet have lt support on strings
var symbol_comp_pair = pair(symbol_eq, nil);

var create_symbol_table = create_table;

fun add_sym_table_entry(t, sym, value){
    return add_table_entry(t, sym, value);
}

fun find_sym_table_pair(t, k){
    return find_table_entry_pair(t, k, symbol_comp_pair);
}

fun sym_in_table(t, k){
    return nil!=find_sym_table_pair(t, k);
}

fun find_sym_table_value(t, k){
    return find_table_entry_value(t, k, symbol_comp_pair);
}

fun remove_sym_table_entry(t, sym){
    return remove_entry_from_table(t, sym, symbol_comp_pair);
}

fun update_sym_table_entry(t, sym, value){
    return update_table_entry(t, sym, value, symbol_comp_pair);
}

## symbol_table-test.lox
// REQUIRE symbol.lox
// REQUIRE list.lox

fun test(){
    var t = create_symbol_table();
    t = add_sym_table_entry(t, "a", nil);
    t = add_sym_table_entry(t, "b", nil);
    t = add_sym_table_entry(t, "c", nil);
    t = update_sym_table_entry(t, "b", list_3(1,2,3));
    list_print(find_sym_table_value(t, "a"));
    list_print(find_sym_table_value(t, "b"));
    list_print(find_sym_table_value(t, "c"));
}

test();

## symbolset.lox
// depends on
// REQUIRE symbol.lox
// REQUIRE set.lox

fun create_symbol_set(){
    return nil;
}

fun symbol_set_add(s, sym){
    return set_add(s, sym, symbol_comp_pair);
}

fun symbol_set_union(s1, s2){
    return set_union(s1, s2, symbol_comp_pair);
}

fun is_symbol_set_member(s, member){
    return is_set_member(s, member, symbol_comp_pair);
}

fun all_l_members_in_symbol_set(l, s){
    while(nil!=l){
	if(!is_symbol_set_member(s, pair_first(l)))
	    return false;
	l = pair_second(l);
    }
    return true;
}

fun symbol_set_len(s){
    return set_len(s);
}

## symbolsettable.lox
// REQUIRE symbol.lox
// REQUIRE symbolset.lox

fun create_symbol_set_table(){
    return create_symbol_table();
}

fun add_empty_symbol_set_to_sym_table(t, sym){
    return add_sym_table_entry(t, sym, create_symbol_set());
}

fun extend_symbol_set_entry_in_sym_table(t, key, newvalue){
    if (sym_in_table(t, key)){
	return update_sym_table_entry(
	    t,
	    key,
	    symbol_set_add(find_sym_table_value(t, key),
			   newvalue) );
    }
    else {
	print "error, symbol table to symbol set, unknown key";
	return nil;
    }
}

fun symbol_set_table_combined_len(t){
    var combined_len = 0;
    var t_iter = start_table_iter(t);
    var current_pair;
    while(table_iter_has_next(t_iter)){
	current_pair = table_iter_current(t_iter);
	combined_len = combined_len +
	    symbol_set_len( pair_second(current_pair) );
	t_iter = table_iter_next(t_iter);
    }
    return combined_len;
}

## table.lox
// implementation of a key/value table with pair lists
// not an efficient method for larger tables
// further comments are in set.lox where its the same situation

// REQUIRE comppair.lox

fun create_table(){
    return nil;
}

fun add_table_entry(t, k, v){
    return pair( pair(k, v), t);
}

fun start_table_iter(t){
    return t;
}

fun table_iter_has_next(t_iter){
    return nil != t_iter;
}

var table_iter_current = pair_first;
var table_iter_next = pair_second;

// I don't bother using start_table_iter, table_iter_has_next,
// table_iter_current, and table_iter_next because this is internal to
// table.lox where it's known that these tables are formed with lists of
// pair()
fun find_table_entry_pair(t, k, comppair){
    var current_pair;
    var equality = comp_pair_eq(comppair);
    while( nil != t ){
	current_pair = pair_first(t);
	if ( equality( pair_first(current_pair), k))
	    return current_pair;
	t = pair_second(t);
    }
    return nil; // if matching table entry not found
}

fun find_table_entry_value(t, k, comppair){
    return pair_second( find_table_entry_pair(t, k, comppair) );
}

// I don't bother using start_table_iter, table_iter_has_next,
// table_iter_current, and table_iter_next because this is internal to
// table.lox where it's known that these tables are formed with lists of
// pair()
// as a side effect this reverses the order of the table entries
fun remove_entry_from_table(t, k, comppair){
    var return_t = nil;
    var current_pair;
    while( t!=nil ){
	current_pair = pair_first(t);
	// include the current pair in the output table as long as it
	// doesn't have a matching key
	if ( pair_first(current_pair) != k )
	    return_t = pair(current_pair, return_t);
	t = pair_second(t);
    }
    return return_t;
}

// has the side-effect of changing the order of table entries
fun update_table_entry(t, k, value, comppair){
    t = remove_entry_from_table(t, k, comppair);
    return add_table_entry(t, k, value);
}

## TODO.md

      
    Raw
  

              TODO.md
            
          
functions for handling partitions
computation of itemsets from grammer rules
possible computation of FOLLOW sets described in some of the literature
generation of LALA(1) automaton (by direct method)
yacc compatible front end to read yacc/bison .y files into the internals of this project
yacc compatible back-end to use a yacc skeleton and generate the parser state machine in C
compiling and interpreting of functions in bootstrappable implementation
bootstrappable simple scheme, possibly reviving and simplifying the slow_lisp mes-m2 project that ended at https://github.com/oriansj/mes-m2/commit/78ae3c569ad4a16738927b8833506f82d67ca11b, possibly just using the mes-m2(oriansj) translation that's now incorporated into mes(janneke)
rewrite i/o pre-processor into simple scheme and/or having a version of the code where INPUT list of chars is built using a native readchar function


## token.lox
// REQUIRE backslash.lox
// REQUIRE config.lox
// REQUIRE triplet.lox

fun token(tok_type, tok_str, linenum){
    return triplet(tok_type, tok_str, line_num);
}

token_type = triplet_first;
token_str = triplet_second;
token_line = triplet_last;

// token types for delcaration section
//
// where % is used, can also be a \, so \\ is %%, \left is %left
TOK_DECL_TEXT = 1; // c code for the declaration section bounded by %{ and %}
TOK_MARK   = 2; // %% mark between declaration, rules, and programs sections
TOK_START  =   3; // %start
TOK_LEFT   =   4; // %left or the alias %<
TOK_RIGHT  =   5; // %right or the alias %>
TOK_TOKEN  =   6; // %token or alias %0 and
TOK_UNION  =   7; // %union
TOK_PREC   =   8; // %prec or the alias %=
TOK_NONASSOC = 9; // %nonassoc
TOK_TYPE   =  10; // %type
TOK_DECL_IDENTIFIER = 11; // identifiers used in the declaration section
TOK_UNION_MEMBER_NAME = 12; // names bounded by angle brackets <name>


// token types for rules section
TOK_RULES_IDENTIFIER = 13; // identifiers used in rules section
TOK_COLON  =           14; // colon :
TOK_ACTION =           15; // c sections bounded by { and }
TOK_SEMI_COLON =        16; // semi colon used at close of rules
TOK_PIPE   =  17; // | character used in rules
// characters bounded by two single quotes ' ', ussually one character
// though " " is a possible substitute
TOK_CHAR   =  18;
// for token numbers chosen by the user
// as stated in the Johnson paper
// """To assign a token number to a token (including literals), the first
// appearance of the token name or literal in the declarations section can be
// immediately followed by a nonnegative integer. This integer is taken to be
// the token number of the name or literal. Names and literals not defined by
// this mechanism retain their default definition. It is important that all
// token numbers be distinct."""
TOK_NUMBER =  19;
TOK_RULES_EOF = 20; // in case end of file is reached in the rules section

// sub-token types within an action
TOK_ACTION_TEXT = 21; // normal stuff
TOK_ACTION_LHS  = 22; // $$ variable for left hand side
TOK_ACTION_RHS  = 23; // $1, $2, etc for right hand side
TOK_ACTION_NEG  = 24; // $0, $-1, $-2 etc preceding leftmost symbol

fun tokenize_declaration_percent(c_triplet, remaining_chars){

}

fun tokenize_possible_declartion_comment(c_triplet, remaining_chars){

}

fun tokenize_declaration_union_member(c_triplet, remaining_chars){

}

fun tokenize_declaration_literal(c_triplet, remaining_chars){

}

fun tokenize_declaration_identifier(chars){
    var c_triplet;
    var accum = nil;
    while( chars != nil){
	c_triplet = pair_first(chars);
	c = triplet_first(c_triplet);

	if (char_triplet_is_whitespace(c_triplet))
	    return pair(chars, reverse(accum));

	accum = pair(c_triplet, accum);
	chars = pair_second(chars);
    }
    return nil;
}


// returns a pair of remaining character_triplets and
// tokens accumulated (in proper order)
//
// it's an error if characters are exausted without finding the
// marker for the end of declaration section %%
//
// in which case,
fun tokenize_declaration_section(input_chars){
    var chars = input_chars;
    var c; // current character
    var accum = nil;

    while( chars != nil ){
	c_triplet = pair_first(chars);
	c = triplet_first(c_triplet);

        // inspect to see if c_triplet decimal is a whitespace char, if so skip
        if (char_triplet_is_whitespace(c_triplet)){
	    chars = pair_second(chars); // skip and go to next char
	}
	else {
	    var result;

	    // below is a situation where if-elseif-else would be handy,
	    // but we only have if-else.
	    // We count on the fact that c can only be one value
	    // so only one of these if clauses will execute
            if (c=="%" or
		(c==BACKSLASH and SUPPORT_BACKSLASH_AS_ALT_TO_PERCENT) ){
		result = tokenize_declaration_percent(
		    c_triplet, pair_second(chars);
	    }

	    // declaration section comment
	    // for now, we're not concerning outselves with the question of, is
	    // there whitespace before the comment
	    if (c=="/")
		result = tokenize_possible_declartion_comment(
		    c_triplet, pair_second(chars) );


	    if (c=="<")
		result = tokenize_declaration_union_member(
		    c_triplet, pair_second(chars) );

	    if (c=="'" or (c==DOUBLE_QUOTE and SUPPORT_DOUBLE_QUOTE_LITERALS))
		result = tokenize_declaration_literal(
		    c_triplet, pair_second(chars) );
	    else
		result = tokenize_declaration_identifier(chars);

	    chars = pair_first(result);
	    // prepend token found, assumption is that some output
	    // token is generated here, even if % isn't followed by what
	    // we'd expect (or we'll throw an error otherwise)
	    accum = pair(pair_second(result), accum);

	    // we expect to find the mark for the end of declarations section
	    // before this while loop goes through all characters
	    if ( token_type(pair_first(accum)) == TOKEN_MARK )
		return pair(chars, reverse(accum));

	}
    } // character while loop

    // it's an error to reach end of file in the declarations
    // section without finding TOKEN_MARK
    return nil;
}

## triplet.lox
fun triplet(one, two, three){
    return pair(one, pair(two, three));
}

var triplet_first = pair_first;

fun triplet_second(t){
    // if there is compiler support for accessing attributes by, this is better
    // return t.second.first;
    return pair_first(pair_second(t));
}

fun triplet_last(t){
    // if there is compiler support for accessing attributes by, this is better
    // return t.second.second;
    return pair_second(pair_second(t));
}
	var ACTION_SHIFT = 0;
	var ACTION_REDUCE = 1;
	var ACTION_ACCEPT = 2;

	fun action_shift(next_state){
	return pair(ACTION_SHIFT, next_state);
	}

	fun action_reduce(next_state){
	return pair(ACTION_REDUCE, next_state);
	}

	fun action_accept(){
	return pair(ACTION_ACCEPT, nil);
	}
	// quoting just a backslash screws up emacs javascript mode, which I shouldn't
	// really be using
	var BACKSLASH = "\";
	// REQUIRE triplet.lox
	// REQUIRE newline.lox
	// REQUIRE reverse.lox

	fun char_triplet(c, c_decimal, c_line_num){
	return triplet(c, c_decimal, c_line_num);
	}

	var char_triplet_str = triplet_first;

	fun char_triplet_decimal(c_triplet){
	return triplet_second(c_triplet);
	}

	fun char_triplet_line_num(c_triplet){
	return triplet_third(c_triplet);
	}

	fun char_triplet_is_whitespace(c_triplet){
	c_decimal = char_triplet_decimal(c_triplet);
	// ascii table (decimal)
	// 9 horizontal tab
	// 10 line feed
	// 11 vertical tab
	// 12 form feed
	// 13 carriage return
	// 32 space
	return ( (c_decimal>= 9 and c_decimal<= 13) or
	c_decimal == 32 );
	}

	// returns a triplet containing
	// * the c_triplets accumulated
	// * the string accumulated
	// * the remaining chars or nil if they were all exhausted
	fun concatenate_chars_until_newline(chars){
	var str_accum = "";
	var triplet_accum = nil;
	var c_triplet;
	var c_str;
	while ( nil != chars ){
	c_triplet = pair_first(chars);
	c_str = char_triplet_str(c_triplet);
	chars = pair_second(chars);
	if (NEWLINE == c_str)
	return triplet(reverse(triplet_accum),
	str_accum,
	chars);
	str_accum = str_accum + c_str;
	triplet_accum = pair(c_triplet, triplet_accum);
	}
	// if we reached the end of input without finding a newline
	return triplet(reverse(triplet_accum), str_accum, nil);
	}
	var comp_pair_eq = pair_first;
	var comp_pair_lt = pair_second;

	var form_comp_pair = pair;
	SUPPORT_BACKSLASH_AS_ALT_TO_PERCENT = true;
	SUPPORT_DOUBLE_QUOTE_LITERALS = true;
	#!/usr/bin/env python3

	# Does not support circular dependencies or even detection yet, will crash
	# trying to resolve them
	#
	# The lox language supports them, you can have one lox file B that
	# has declarations of globals based on other globals (dependency) from A
	# and have the other lox file (A) just have functions that rely on globals
	# that the other dep (B). Because those functions don't resolve the global
	# until called at runtime, it's fine.
	#
	# So if you have a safe circular dep like this, have a // REQUIRE A.lox
	# at the top of B.lox and just put a note in A.lox explaining the
	# circular dep

	from sys import argv
	from os.path import exists

	def deps_in_file(filename):
	if not exists(filename):
	raise Exception("%s not found" % filename)

	direct_deps = []
	with open(filename) as f:
	for line in f:
	line_parts = line.strip().split(" ")

	if (len(line_parts) >= 3 and
	line_parts[0]=="//" and
	line_parts[1]=="REQUIRE" and
	line_parts[2].endswith(".lox") ):
	direct_deps.append(line_parts[2])
	return direct_deps

	def recursively_find_inner(filename, filename_expansion):
	if filename in filename_expansion:
	new_deps = filename_expansion[filename]
	else:
	new_deps = deps_in_file(filename)
	filename_expansion[filename] = new_deps

	output = [filename]
	for expanded_filename in reversed(new_deps):
	output.extend( recursively_find_inner(
	expanded_filename,
	filename_expansion) )
	output_filtered = []
	output_set = set()
	for output_item in reversed(output):
	if output_item not in output_set:
	output_filtered.append(output_item)
	output_set.add(output_item)
	output_filtered.reverse()
	return output_filtered


	def recursively_find(filename):
	result = recursively_find_inner(filename, {})
	result.reverse()
	return result

	def main(start_filename):
	if not start_filename.endswith(".lox"):
	raise Exception("dependency search not on a .lox file")
	filename_w_out_prefix = start_filename[:-len(".lox")]
	print("%(filename)s_concat.lox %(filename)s_concat.lox.d: " %
	{'filename': filename_w_out_prefix},
	end="")
	print(" ".join(recursively_find(start_filename)))

	if __name__ == "__main__":
	main(argv[1])
	%token varDecl constDecl statement

	%%

	program: /* empty */
	\| program declaration;

	declaration: varDecl
	\| constDecl
	\| statement;

	%%
	%token a c d

	%start Z

	%%

	X: Y
	\| a;

	Y: /* empty */
	\| c;

	Z: d
	\| X Y Z;

	%%
	%token a b c d e

	%%

	S: a A d
	\| b B d
	\| a B e
	\| b A e;

	A: c;

	B: c;

	%%
	// REQUIRE list.lox
	// REQUIRE grammer.lox

	// currently this isn't needed because the examples are structured
	// identically to the internals of grammer.lox, but we perform this
	// conversion just in case
	fun convert_list_of_rule_pairs_to_grammer(rules){
	var g = create_grammer();
	var rule;
	while (nil!=rules){
	rule = pair_first(rules);
	g = extend_grammer(g, pair_first(rule), pair_second(rule) );
	rules = pair_second(rules);
	}
	return g;
	}

	// this grammer comes from
	// http://boxbase.org/entries/2019/oct/14/lr1-parsing-tables/
	fun example_grammer_1(){
	var rules = list_2(
	pair("program",
	list_2( list_1(""),
	list_2("program", "declaration")) ),

	pair("declaration",
	list_3( list_1("varDecl"),
	list_1("constDecl"),
	list_1("statement")) ) );

	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change/
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// from https://www.csd.uwo.ca/~watt/home/courses/2008-09/cs4447a/notes/CS4447%2004%20--%20Parsing%20Algorithms%201.pdf
	// directly nullable set is Y
	// recursively nullable set is X and Y
	// also includes FIRST and FOLLOW sets
	// FIRST(X) is a c, FIRST(Y) is c, FIRST(Z) is a c d
	// FOLLOW(X) is a c d, FOLLOW(Y) is a c d, FOLLOW(Z) is empty
	fun example_grammer_2(){
	var rules = list_3(
	pair("X",
	list_2( list_1("Y"),
	list_1("a") ) ),
	pair("Y",
	list_2( list_1("c"),
	list_1("") ) ),
	pair("Z",
	list_2( list_1("d"),
	list_3("X", "Y", "Z") ) ) );
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change/
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// a variation of example_grammer_2 that better tests nullable
	fun example_grammer_3(){
	var rules = list_4(
	pair("X",
	list_2( list_1("Y"),
	list_1("a") ) ),
	pair("Y",
	list_2( list_1("c"),
	list_1("") ) ),
	pair("Z",
	list_2( list_1("d"),
	list_3("X", "Y", "Z") ) ),
	pair("K",
	list_1( list_4("X", "Y", "X", "Y") ) ) );
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change/
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// from http://web.cs.dal.ca/~sjackson/lalr1.html
	// as cited by sample 1 at
	// https://github.com/alexpizarroj/lalr1-table-generator/
	fun example_grammer_4(){
	var rules = list_3(
	pair("N",
	list_2( list_3("V", "=", "E"),
	list_1("E") ) ),
	pair("E",
	list_1( list_1("V") ) ),
	pair("V",
	list_2( list_1("x"),
	list_2("*", "E") ) ) );
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// from the Dragonbook, page 271, example 4.61
	// as cited by sample 2 at
	// https://github.com/alexpizarroj/lalr1-table-generator/
	fun example_grammer_5(){
	var rules = list_3(
	pair("S",
	list_2( list_3("L", "=", "R"),
	list_1("R") ) ),
	pair("L",
	list_2( list_2("*", "R"),
	list_1("ID") ) ),
	pair("R",
	list_1( list_1("L") ) ) );
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// default example on
	// http://jsmachines.sourceforge.net/machines/lalr1.html
	// This is also Dragonbook, page 263, grammar 4.55 below example 4.54
	// as cited by sample 3 at
	// https://github.com/alexpizarroj/lalr1-table-generator/
	fun example_grammer_6(){
	var rules = list_2(
	pair("S", list_1( list_2("C", "C") )),
	pair("C", list_2( list_2("c", "C"),
	list_1("d") ) ) );
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// from the Dragonbook, page 267, example 4.58
	// as cited by sample 4 at
	// https://github.com/alexpizarroj/lalr1-table-generator/
	fun example_grammer_7(){
	var rules = list_3(
	pair("S",
	list_4( list_3("a", "A", "d"),
	list_3("b", "B", "d"),
	list_3("a", "B", "e"),
	list_3("b", "A", "e") ) ),
	pair("A",
	list_1( list_1("c") ) ),
	pair("B",
	list_1( list_1("c") ) ) );

	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change
	return convert_list_of_rule_pairs_to_grammer(rules);
	}

	// sample 5 from
	// https://github.com/alexpizarroj/lalr1-table-generator/
	fun example_grammer_8(){
	var rules = list_4(
	pair("p",
	list_1( list_2("title", "ss") ) ),
	pair("title",
	list_1( list_3("TITLE", "TEXT", "NEWLINE") ) ),
	pair("ss",
	list_2( list_2("s", "ss"),
	list_1("s") ) ),
	pair("s",
	list_6(
	list_7("NOTE", "LEFT", "OF", "TEXT",
	"':'", "TEXT", "NEWLINE"),
	list_6("TEXT", "->", "TEXT", "':'", "TEXT", "NEWLINE"),
	list_6("LOOP", "TEXT", "NEWLINE", "ss", "END", "NEWLINE"),
	list_5("LOOP", "TEXT", "NEWLINE", "END", "NEWLINE"),
	list_9("ALT", "TEXT", "NEWLINE", "ss", "ELSE",
	"NEWLINE", "ss", "END", "NEWLINE"),
	list_6("ALT", "TEXT", "NEWLINE", "ss", "END", "NEWLINE") ) )
	);
	// this conversion currently does nothing because what we have above
	// is the internal structure of our grammer, but
	// we abstract that away in case the grammer internals change
	return convert_list_of_rule_pairs_to_grammer(rules);
	}