FROGGS/ignoremark-nqp.diff Secret

## ignoremark-nqp.diff
diff --git a/src/QRegex/NFA.nqp b/src/QRegex/NFA.nqp
index 2b51468..09e3919 100644
--- a/src/QRegex/NFA.nqp
+++ b/src/QRegex/NFA.nqp
@@ -21,6 +21,8 @@ class QRegex::NFA {
     my $EDGE_CHARRANGE_NEG   := 13;
     my $EDGE_CODEPOINT_LL    := 14;
     my $EDGE_CODEPOINT_I_LL  := 15;
+    my $EDGE_CODEPOINT_M     := 16;
+    my $EDGE_CODEPOINT_M_NEG := 17;

     my $ACTIONS;
     my $nfadeb;
@@ -171,7 +173,7 @@ class QRegex::NFA {
         %cclass_code<n>  := nqp::const::CCLASS_NEWLINE;
         %cclass_code<nl> := nqp::const::CCLASS_NEWLINE;
         $nfadeb := nqp::existskey(nqp::getenvhash(),'NQP_NFA_DEB');
-        $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL'];
+        $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL','CODEPOINT_M','CODEPOINT_M_NEG'];
         # $ind := 0;
         # $indent := '';
         $nfatime := 0;
@@ -244,6 +246,14 @@ class QRegex::NFA {
                 dentout(self.addedge($from, $to, $!LITEND ?? $EDGE_CODEPOINT_I !!  $EDGE_CODEPOINT_I_LL,
                     [nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]));
             }
+            elsif $node.subtype eq 'ignoremark' {
+                my str $litconst := $node[0];
+                while $i < $litlen {
+                    $from := self.addedge($from, -1, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i));
+                    $i := $i + 1;
+                }
+                dentout(self.addedge($from, $to, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i)));
+            }
             else {
                 my str $litconst := $node[0];
                 while $i < $litlen {
diff --git a/src/QRegex/P6Regex/Actions.nqp b/src/QRegex/P6Regex/Actions.nqp
index f55e543..4390dd6 100644
--- a/src/QRegex/P6Regex/Actions.nqp
+++ b/src/QRegex/P6Regex/Actions.nqp
@@ -120,6 +120,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
         else {
             my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
             $qast.subtype('ignorecase') if %*RX<i>;
+            $qast.subtype('ignoremark') if %*RX<m>;
             make $qast;
         }
     }
@@ -692,10 +693,11 @@ class QRegex::P6Regex::Actions is HLL::Actions {
                 }
                 else {
                     my $c := ~$_[0];
-                    $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
+                    #~ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
+                    $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! %*RX<m> ?? nqp::chr(nqp::ordbaseat($c, 0)) !! $c);
                 }
             }
-            @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
+            @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ), :subtype(%*RX<m> ?? 'ignoremark' !! '') ))
                 if nqp::chars($str);
             $qast := +@alts == 1 ?? @alts[0] !!
                 $<sign> eq '-' ??
diff --git a/src/QRegex/P6Regex/Grammar.nqp b/src/QRegex/P6Regex/Grammar.nqp
index 2e9de71..784c390 100644
--- a/src/QRegex/P6Regex/Grammar.nqp
+++ b/src/QRegex/P6Regex/Grammar.nqp
@@ -453,6 +453,12 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {

     proto token mod_ident { <...> }
     token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? » }
+    token mod_ident:sym<ignoremark> {
+        [
+        | $<sym>=[m]
+        | 'ignore' $<sym>=[m] 'ark'
+        ] »
+    }
     token mod_ident:sym<ratchet>    { $<sym>=[r] 'atchet'? » }
     token mod_ident:sym<sigspace>   { $<sym>=[s] 'igspace'? » }
     token mod_ident:sym<dba>        { <sym> » }
diff --git a/src/vm/moar/QAST/QASTOperationsMAST.nqp b/src/vm/moar/QAST/QASTOperationsMAST.nqp
index f0bd11e..583be8f 100644
--- a/src/vm/moar/QAST/QASTOperationsMAST.nqp
+++ b/src/vm/moar/QAST/QASTOperationsMAST.nqp
@@ -2147,6 +2147,7 @@ QAST::MASTOperations.add_core_moarop_mapping('split', 'split');
 QAST::MASTOperations.add_core_moarop_mapping('chr', 'chr');
 QAST::MASTOperations.add_core_moarop_mapping('ordfirst', 'ordfirst');
 QAST::MASTOperations.add_core_moarop_mapping('ordat', 'ordat');
+QAST::MASTOperations.add_core_moarop_mapping('ordbaseat', 'ordbaseat');
 QAST::MASTOperations.add_core_moarop_mapping('indexfrom', 'index_s');
 QAST::MASTOperations.add_core_moarop_mapping('rindexfrom', 'rindexfrom');
 QAST::MASTOperations.add_core_moarop_mapping('substr_s', 'substr_s');
diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
index 558b3f6..efc1f00 100644
--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
@@ -507,7 +507,20 @@ class QAST::MASTRegexCompiler {
     method enumcharlist($node) {
         my @ins;
         my $op := $node.negate ?? 'indexnat' !! 'indexat';
-        nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
+        if $node.subtype eq 'ignoremark' { nqp::say('enumcharlist ignoremark: ' ~ $node[0]);
+            my $i0 := $!regalloc.fresh_i();
+            my $s0 := $!regalloc.fresh_s();
+            merge_ins(@ins, [
+                op('ge_i', $i0, %!reg<pos>, %!reg<eos>),
+                op('if_i', $i0, %!reg<fail>),
+                op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
+                op('chr', $s0, $i0),
+                op($op, $s0, %!reg<zero>, sval($node[0]), %!reg<fail>),
+            ]);
+        }
+        else {
+            nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
+        }
         nqp::push(@ins, op('inc_i', %!reg<pos>))
             unless $node.subtype eq 'zerowidth';
         @ins
@@ -577,7 +590,8 @@ class QAST::MASTRegexCompiler {

     method literal($node) {
         my $litconst := $node[0];
-        my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! 'eqat_s';
+        my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !!
+                     $node.subtype eq 'ignoremark' ?? 'eqatim_s' !! 'eqat_s';
         my $s0 := $!regalloc.fresh_s();
         my $i0 := $!regalloc.fresh_i();
         my $cmpop := $node.negate ?? 'if_i' !! 'unless_i';
@@ -922,7 +936,7 @@ class QAST::MASTRegexCompiler {
             $looplabel,
             op('inc_i', %!reg<pos>),
         ];
-        if $node.list && $node.subtype ne 'ignorecase' {
+        if $node.list && $node.subtype ne 'ignorecase' && $node.subtype ne 'ignoremark' {
             my $lit := $!regalloc.fresh_s();
             nqp::push(@ins, op('const_s', $lit, sval($node[0])));
             nqp::push(@ins, op('index_s', %!reg<pos>, %!reg<tgt>, $lit, %!reg<pos>));

## ignoremark-rakudo.diff
diff --git a/src/Perl6/Actions.nqp b/src/Perl6/Actions.nqp
index 0f368ca..4ebcb2b 100644
--- a/src/Perl6/Actions.nqp
+++ b/src/Perl6/Actions.nqp
@@ -6073,6 +6073,7 @@ Compilation unit '$file' contained the following violations:
     );
     my %REGEX_ADVERBS_CANONICAL := hash(
         ignorecase  => 'i',
+        ignoremark  => 'm',
         ratchet     => 'r',
         sigspace    => 's',
         continue    => 'c',
@@ -6096,7 +6097,7 @@ Compilation unit '$file' contained the following violations:
         ss        => 's',
     );
     INIT {
-        my str $mods := 'i ignorecase s sigspace r ratchet Perl5 P5';
+        my str $mods := 'i ignorecase m ignoremark s sigspace r ratchet Perl5 P5';
         for nqp::split(' ', $mods) {
             %SHARED_ALLOWED_ADVERBS{$_} := 1;
         }
	diff --git a/src/QRegex/NFA.nqp b/src/QRegex/NFA.nqp
	index 2b51468..09e3919 100644
	--- a/src/QRegex/NFA.nqp
	+++ b/src/QRegex/NFA.nqp
	@@ -21,6 +21,8 @@ class QRegex::NFA {
	my $EDGE_CHARRANGE_NEG := 13;
	my $EDGE_CODEPOINT_LL := 14;
	my $EDGE_CODEPOINT_I_LL := 15;
	+ my $EDGE_CODEPOINT_M := 16;
	+ my $EDGE_CODEPOINT_M_NEG := 17;

	my $ACTIONS;
	my $nfadeb;
	@@ -171,7 +173,7 @@ class QRegex::NFA {
	%cclass_code<n> := nqp::const::CCLASS_NEWLINE;
	%cclass_code<nl> := nqp::const::CCLASS_NEWLINE;
	$nfadeb := nqp::existskey(nqp::getenvhash(),'NQP_NFA_DEB');
	- $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL'];
	+ $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL','CODEPOINT_M','CODEPOINT_M_NEG'];
	# $ind := 0;
	# $indent := '';
	$nfatime := 0;
	@@ -244,6 +246,14 @@ class QRegex::NFA {
	dentout(self.addedge($from, $to, $!LITEND ?? $EDGE_CODEPOINT_I !! $EDGE_CODEPOINT_I_LL,
	[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]));
	}
	+ elsif $node.subtype eq 'ignoremark' {
	+ my str $litconst := $node[0];
	+ while $i < $litlen {
	+ $from := self.addedge($from, -1, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i));
	+ $i := $i + 1;
	+ }
	+ dentout(self.addedge($from, $to, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i)));
	+ }
	else {
	my str $litconst := $node[0];
	while $i < $litlen {
	diff --git a/src/QRegex/P6Regex/Actions.nqp b/src/QRegex/P6Regex/Actions.nqp
	index f55e543..4390dd6 100644
	--- a/src/QRegex/P6Regex/Actions.nqp
	+++ b/src/QRegex/P6Regex/Actions.nqp
	@@ -120,6 +120,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
	else {
	my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
	$qast.subtype('ignorecase') if %*RX<i>;
	+ $qast.subtype('ignoremark') if %*RX<m>;
	make $qast;
	}
	}
	@@ -692,10 +693,11 @@ class QRegex::P6Regex::Actions is HLL::Actions {
	}
	else {
	my $c := ~$_[0];
	- $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
	+ #~ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
	+ $str := $str ~ (%RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! %RX<m> ?? nqp::chr(nqp::ordbaseat($c, 0)) !! $c);
	}
	}
	- @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
	+ @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ), :subtype(%*RX<m> ?? 'ignoremark' !! '') ))
	if nqp::chars($str);
	$qast := +@alts == 1 ?? @alts[0] !!
	$<sign> eq '-' ??
	diff --git a/src/QRegex/P6Regex/Grammar.nqp b/src/QRegex/P6Regex/Grammar.nqp
	index 2e9de71..784c390 100644
	--- a/src/QRegex/P6Regex/Grammar.nqp
	+++ b/src/QRegex/P6Regex/Grammar.nqp
	@@ -453,6 +453,12 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {

	proto token mod_ident { <...> }
	token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? » }
	+ token mod_ident:sym<ignoremark> {
	+ [
	+ \| $<sym>=[m]
	+ \| 'ignore' $<sym>=[m] 'ark'
	+ ] »
	+ }
	token mod_ident:sym<ratchet> { $<sym>=[r] 'atchet'? » }
	token mod_ident:sym<sigspace> { $<sym>=[s] 'igspace'? » }
	token mod_ident:sym<dba> { <sym> » }
	diff --git a/src/vm/moar/QAST/QASTOperationsMAST.nqp b/src/vm/moar/QAST/QASTOperationsMAST.nqp
	index f0bd11e..583be8f 100644
	--- a/src/vm/moar/QAST/QASTOperationsMAST.nqp
	+++ b/src/vm/moar/QAST/QASTOperationsMAST.nqp
	@@ -2147,6 +2147,7 @@ QAST::MASTOperations.add_core_moarop_mapping('split', 'split');
	QAST::MASTOperations.add_core_moarop_mapping('chr', 'chr');
	QAST::MASTOperations.add_core_moarop_mapping('ordfirst', 'ordfirst');
	QAST::MASTOperations.add_core_moarop_mapping('ordat', 'ordat');
	+QAST::MASTOperations.add_core_moarop_mapping('ordbaseat', 'ordbaseat');
	QAST::MASTOperations.add_core_moarop_mapping('indexfrom', 'index_s');
	QAST::MASTOperations.add_core_moarop_mapping('rindexfrom', 'rindexfrom');
	QAST::MASTOperations.add_core_moarop_mapping('substr_s', 'substr_s');
	diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
	index 558b3f6..efc1f00 100644
	--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
	+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
	@@ -507,7 +507,20 @@ class QAST::MASTRegexCompiler {
	method enumcharlist($node) {
	my @ins;
	my $op := $node.negate ?? 'indexnat' !! 'indexat';
	- nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
	+ if $node.subtype eq 'ignoremark' { nqp::say('enumcharlist ignoremark: ' ~ $node[0]);
	+ my $i0 := $!regalloc.fresh_i();
	+ my $s0 := $!regalloc.fresh_s();
	+ merge_ins(@ins, [
	+ op('ge_i', $i0, %!reg<pos>, %!reg<eos>),
	+ op('if_i', $i0, %!reg<fail>),
	+ op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
	+ op('chr', $s0, $i0),
	+ op($op, $s0, %!reg<zero>, sval($node[0]), %!reg<fail>),
	+ ]);
	+ }
	+ else {
	+ nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
	+ }
	nqp::push(@ins, op('inc_i', %!reg<pos>))
	unless $node.subtype eq 'zerowidth';
	@ins
	@@ -577,7 +590,8 @@ class QAST::MASTRegexCompiler {

	method literal($node) {
	my $litconst := $node[0];
	- my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! 'eqat_s';
	+ my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !!
	+ $node.subtype eq 'ignoremark' ?? 'eqatim_s' !! 'eqat_s';
	my $s0 := $!regalloc.fresh_s();
	my $i0 := $!regalloc.fresh_i();
	my $cmpop := $node.negate ?? 'if_i' !! 'unless_i';
	@@ -922,7 +936,7 @@ class QAST::MASTRegexCompiler {
	$looplabel,
	op('inc_i', %!reg<pos>),
	];
	- if $node.list && $node.subtype ne 'ignorecase' {
	+ if $node.list && $node.subtype ne 'ignorecase' && $node.subtype ne 'ignoremark' {
	my $lit := $!regalloc.fresh_s();
	nqp::push(@ins, op('const_s', $lit, sval($node[0])));
	nqp::push(@ins, op('index_s', %!reg<pos>, %!reg<tgt>, $lit, %!reg<pos>));
	diff --git a/src/Perl6/Actions.nqp b/src/Perl6/Actions.nqp
	index 0f368ca..4ebcb2b 100644
	--- a/src/Perl6/Actions.nqp
	+++ b/src/Perl6/Actions.nqp
	@@ -6073,6 +6073,7 @@ Compilation unit '$file' contained the following violations:
	);
	my %REGEX_ADVERBS_CANONICAL := hash(
	ignorecase => 'i',
	+ ignoremark => 'm',
	ratchet => 'r',
	sigspace => 's',
	continue => 'c',
	@@ -6096,7 +6097,7 @@ Compilation unit '$file' contained the following violations:
	ss => 's',
	);
	INIT {
	- my str $mods := 'i ignorecase s sigspace r ratchet Perl5 P5';
	+ my str $mods := 'i ignorecase m ignoremark s sigspace r ratchet Perl5 P5';
	for nqp::split(' ', $mods) {
	%SHARED_ALLOWED_ADVERBS{$_} := 1;
	}