skids/Cursor_comments.pm6

## Cursor_comments.pm6
# At http://irclog.perlgeek.de/perl6/2015-06-27#i_10814220 I mentioned
# the concept of a rule that matches strings contained in a baggy thing
# only as many times as they appear in the bag as a litmus test for
# the current limitations of using Cursor and/or dynamic state inside
# grammars.

# On more examination the limitations are not as bad as I initially thought,
# but they are still prohibitive to making something like that work.

# First, here's why I thought Cursor was a bit broken:

grammar bar {
    regex TOP { <f> <f> { self.pos.say } }
    regex f { . }
}

bar.parse("aa");
#-> 0

# S02 says: (modulo the $¢ identifier itself being NYI)
# "Within a closure, the instantaneous position within the search is denoted
# by the $¢.pos method"
# ...which is not what we are getting here.
#
# This is actually not too bad, though.  What Cursor.pos has is the start
# of the current rule, and you can indeed get your current position,
# through $/.to

grammar bar2 {
    regex TOP { <f> <g> }
    regex g { <f> <f> { self.pos.say; say $/.from ~ ".." ~ $/.to } }
    regex f { . }
}

bar2.parse("aaa");
#-> 1
#-> 1..3

# Per spec, $/ has the hypothetical captures only from your innermost rule.
grammar bar3 {
    regex TOP { <f> <g> }
    regex g { <f> <f> { $/<f>.elems.say } }
    regex f { . }
}

bar3.parse("aaa");
#-> 2

# But you may want them from rules that called you, which is what
# Cursor is supposed to be for.  There seems to be self.CAPHASH
# for that though it does not appear to quite be ready for user-facing
# use.

grammar bar4 {
    regex TOP { <f> <g> }
    regex g { <f> <f> { self.CAPHASH.hash<f>.Str.say; } }
    regex f { . }
}

bar4.parse("aaa");
#-> a

# So... self.CAPHASH does not have anything that happened in the
# current rule, JUST the stuff from previous rules.  Well, it is
# LTA that you have to glue $/ and self.CAPHASH together but on
# the other hand you may want exactly what CAPHASH has.  (Though
# gluing versus using CALLERS are different stratas of convenience IMO.)

# Also there is this:

grammar foo {
    regex TOP {  [ <foo> b || <bar> b ]+ };
    regex bar { <foo> <foo> <ft> };
    regex foo { a { self.CAPHASH.hash.keys.say; } };
    regex ft { 42 }
};

foo.parse('aa42bab');
#-> bar foo
#-> foo
#-> foo
#-> bar foo

# That actually still mystifies me a bit.  Sometimes keys show up,
# with no contents, where you would not expect them.

# So if we do glue $/ and self.CAPHASH together does that give us
# enough information to implement the <inbag> rule without carrying
# around our own dynamic state, just by brute force deduction?  Not
# quite:

grammar bagonce {
# (We'll forget about parameterizing the rule and about positional
# captures to keep things simple.)

    my %b := BagHash.new(<a b b c c c 1 2 2 3 3 3>);

    my multi already-in-bag (%a, Match $m) {
        my %q := %a (+) BagHash.new($m.hash<inbag>.values.map: *.Str);
        for $m.hash.pairs.grep: {
            $_.value ~~ Match and not $_.key eq "inbag"
        } {
            %q := already-in-bag(%q, .value);
        }
        return %q;
    }

    my multi already-in-bag (%a, Match $m, $caphash) {
        my %q := already-in-bag(%a, $m);
        %q := %q (+) BagHash.new($caphash<inbag>.values.map: *.Str);
        for $caphash.hash.pairs.grep: {
            $_.value ~~ Match and not $_.key eq "inbag"
        } {
            %q := already-in-bag(%q, .value);
        }
        return %q;
    }

    regex inbag {
        :my @okhere;
        { my %c;
          %c := %b (-) already-in-bag(BagHash.new(), $/, self.CAPHASH );
          @okhere = %c.keys;
        }
        @okhere
    }

    regex unhiddentop {
        <inbag>+
    }

    regex TOP {
        <.inbag> <inbag>+
    }
}

bagonce.parse("abbccc122333").say;  # Want this to succeed, does
bagonce.parse("abbccc122333a").say; # Want this to fail, succeeds
bagonce.parse("abbccc122333a", :rule<unhiddentop>).say; # Want to fail, does

# ... not when you have non-capturing rules.  And this is why I have
# started to develop a major distaste for <.rule>.  On the surface
# it seems like a convenient way to pre-prune parse trees rather than
# build a fully connected .made.  But it does not just hide things to
# the user of your grammar API, it hides them internally, and if you
# are subclassing someone else's grammar, they may be littered around
# in inconvenient places in the middle of long rules, so you have to
# mirror their code:

grammar someone-elses-grammar {
#    ...
    regex foo { <bar> <.foo> || [ <compicated> <grr> ]+ <.foo> }
#    ...
}

grammar i-just-want-to-see-foo is someone-elses-grammar {
    regex foo { <bar> <foo> || [ <compicated> <grr> ]+ <foo> }
}

#... and then when the upstream author changes that rule to fix a bug...
	# At http://irclog.perlgeek.de/perl6/2015-06-27#i_10814220 I mentioned
	# the concept of a rule that matches strings contained in a baggy thing
	# only as many times as they appear in the bag as a litmus test for
	# the current limitations of using Cursor and/or dynamic state inside
	# grammars.

	# On more examination the limitations are not as bad as I initially thought,
	# but they are still prohibitive to making something like that work.

	# First, here's why I thought Cursor was a bit broken:

	grammar bar {
	regex TOP { <f> <f> { self.pos.say } }
	regex f { . }
	}

	bar.parse("aa");
	#-> 0

	# S02 says: (modulo the $¢ identifier itself being NYI)
	# "Within a closure, the instantaneous position within the search is denoted
	# by the $¢.pos method"
	# ...which is not what we are getting here.
	#
	# This is actually not too bad, though. What Cursor.pos has is the start
	# of the current rule, and you can indeed get your current position,
	# through $/.to

	grammar bar2 {
	regex TOP { <f> <g> }
	regex g { <f> <f> { self.pos.say; say $/.from ~ ".." ~ $/.to } }
	regex f { . }
	}

	bar2.parse("aaa");
	#-> 1
	#-> 1..3

	# Per spec, $/ has the hypothetical captures only from your innermost rule.
	grammar bar3 {
	regex TOP { <f> <g> }
	regex g { <f> <f> { $/<f>.elems.say } }
	regex f { . }
	}

	bar3.parse("aaa");
	#-> 2

	# But you may want them from rules that called you, which is what
	# Cursor is supposed to be for. There seems to be self.CAPHASH
	# for that though it does not appear to quite be ready for user-facing
	# use.

	grammar bar4 {
	regex TOP { <f> <g> }
	regex g { <f> <f> { self.CAPHASH.hash<f>.Str.say; } }
	regex f { . }
	}

	bar4.parse("aaa");
	#-> a

	# So... self.CAPHASH does not have anything that happened in the
	# current rule, JUST the stuff from previous rules. Well, it is
	# LTA that you have to glue $/ and self.CAPHASH together but on
	# the other hand you may want exactly what CAPHASH has. (Though
	# gluing versus using CALLERS are different stratas of convenience IMO.)

	# Also there is this:

	grammar foo {
	regex TOP { [ <foo> b \|\| <bar> b ]+ };
	regex bar { <foo> <foo> <ft> };
	regex foo { a { self.CAPHASH.hash.keys.say; } };
	regex ft { 42 }
	};

	foo.parse('aa42bab');
	#-> bar foo
	#-> foo
	#-> foo
	#-> bar foo

	# That actually still mystifies me a bit. Sometimes keys show up,
	# with no contents, where you would not expect them.

	# So if we do glue $/ and self.CAPHASH together does that give us
	# enough information to implement the <inbag> rule without carrying
	# around our own dynamic state, just by brute force deduction? Not
	# quite:

	grammar bagonce {
	# (We'll forget about parameterizing the rule and about positional
	# captures to keep things simple.)

	my %b := BagHash.new(<a b b c c c 1 2 2 3 3 3>);

	my multi already-in-bag (%a, Match $m) {
	my %q := %a (+) BagHash.new($m.hash<inbag>.values.map: *.Str);
	for $m.hash.pairs.grep: {
	$_.value ~~ Match and not $_.key eq "inbag"
	} {
	%q := already-in-bag(%q, .value);
	}
	return %q;
	}

	my multi already-in-bag (%a, Match $m, $caphash) {
	my %q := already-in-bag(%a, $m);
	%q := %q (+) BagHash.new($caphash<inbag>.values.map: *.Str);
	for $caphash.hash.pairs.grep: {
	$_.value ~~ Match and not $_.key eq "inbag"
	} {
	%q := already-in-bag(%q, .value);
	}
	return %q;
	}

	regex inbag {
	:my @okhere;
	{ my %c;
	%c := %b (-) already-in-bag(BagHash.new(), $/, self.CAPHASH );
	@okhere = %c.keys;
	}
	@okhere
	}

	regex unhiddentop {
	<inbag>+
	}

	regex TOP {
	<.inbag> <inbag>+
	}
	}

	bagonce.parse("abbccc122333").say; # Want this to succeed, does
	bagonce.parse("abbccc122333a").say; # Want this to fail, succeeds
	bagonce.parse("abbccc122333a", :rule<unhiddentop>).say; # Want to fail, does

	# ... not when you have non-capturing rules. And this is why I have
	# started to develop a major distaste for <.rule>. On the surface
	# it seems like a convenient way to pre-prune parse trees rather than
	# build a fully connected .made. But it does not just hide things to
	# the user of your grammar API, it hides them internally, and if you
	# are subclassing someone else's grammar, they may be littered around
	# in inconvenient places in the middle of long rules, so you have to
	# mirror their code:

	grammar someone-elses-grammar {
	# ...
	regex foo { <bar> <.foo> \|\| [ <compicated> <grr> ]+ <.foo> }
	# ...
	}

	grammar i-just-want-to-see-foo is someone-elses-grammar {
	regex foo { <bar> <foo> \|\| [ <compicated> <grr> ]+ <foo> }
	}

	#... and then when the upstream author changes that rule to fix a bug...