-
-
Save FROGGS/1a0cb3448b7f014511d8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/QRegex/NFA.nqp b/src/QRegex/NFA.nqp | |
index 2b51468..09e3919 100644 | |
--- a/src/QRegex/NFA.nqp | |
+++ b/src/QRegex/NFA.nqp | |
@@ -21,6 +21,8 @@ class QRegex::NFA { | |
my $EDGE_CHARRANGE_NEG := 13; | |
my $EDGE_CODEPOINT_LL := 14; | |
my $EDGE_CODEPOINT_I_LL := 15; | |
+ my $EDGE_CODEPOINT_M := 16; | |
+ my $EDGE_CODEPOINT_M_NEG := 17; | |
my $ACTIONS; | |
my $nfadeb; | |
@@ -171,7 +173,7 @@ class QRegex::NFA { | |
%cclass_code<n> := nqp::const::CCLASS_NEWLINE; | |
%cclass_code<nl> := nqp::const::CCLASS_NEWLINE; | |
$nfadeb := nqp::existskey(nqp::getenvhash(),'NQP_NFA_DEB'); | |
- $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL']; | |
+ $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL','CODEPOINT_M','CODEPOINT_M_NEG']; | |
# $ind := 0; | |
# $indent := ''; | |
$nfatime := 0; | |
@@ -244,6 +246,14 @@ class QRegex::NFA { | |
dentout(self.addedge($from, $to, $!LITEND ?? $EDGE_CODEPOINT_I !! $EDGE_CODEPOINT_I_LL, | |
[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)])); | |
} | |
+ elsif $node.subtype eq 'ignoremark' { | |
+ my str $litconst := $node[0]; | |
+ while $i < $litlen { | |
+ $from := self.addedge($from, -1, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i)); | |
+ $i := $i + 1; | |
+ } | |
+ dentout(self.addedge($from, $to, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i))); | |
+ } | |
else { | |
my str $litconst := $node[0]; | |
while $i < $litlen { | |
diff --git a/src/QRegex/P6Regex/Actions.nqp b/src/QRegex/P6Regex/Actions.nqp | |
index f55e543..4390dd6 100644 | |
--- a/src/QRegex/P6Regex/Actions.nqp | |
+++ b/src/QRegex/P6Regex/Actions.nqp | |
@@ -120,6 +120,7 @@ class QRegex::P6Regex::Actions is HLL::Actions { | |
else { | |
my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/)); | |
$qast.subtype('ignorecase') if %*RX<i>; | |
+ $qast.subtype('ignoremark') if %*RX<m>; | |
make $qast; | |
} | |
} | |
@@ -692,10 +693,11 @@ class QRegex::P6Regex::Actions is HLL::Actions { | |
} | |
else { | |
my $c := ~$_[0]; | |
- $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c); | |
+ #~ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c); | |
+ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! %*RX<m> ?? nqp::chr(nqp::ordbaseat($c, 0)) !! $c); | |
} | |
} | |
- @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) )) | |
+ @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ), :subtype(%*RX<m> ?? 'ignoremark' !! '') )) | |
if nqp::chars($str); | |
$qast := +@alts == 1 ?? @alts[0] !! | |
$<sign> eq '-' ?? | |
diff --git a/src/QRegex/P6Regex/Grammar.nqp b/src/QRegex/P6Regex/Grammar.nqp | |
index 2e9de71..784c390 100644 | |
--- a/src/QRegex/P6Regex/Grammar.nqp | |
+++ b/src/QRegex/P6Regex/Grammar.nqp | |
@@ -453,6 +453,12 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar { | |
proto token mod_ident { <...> } | |
token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? » } | |
+ token mod_ident:sym<ignoremark> { | |
+ [ | |
+ | $<sym>=[m] | |
+ | 'ignore' $<sym>=[m] 'ark' | |
+ ] » | |
+ } | |
token mod_ident:sym<ratchet> { $<sym>=[r] 'atchet'? » } | |
token mod_ident:sym<sigspace> { $<sym>=[s] 'igspace'? » } | |
token mod_ident:sym<dba> { <sym> » } | |
diff --git a/src/vm/moar/QAST/QASTOperationsMAST.nqp b/src/vm/moar/QAST/QASTOperationsMAST.nqp | |
index f0bd11e..583be8f 100644 | |
--- a/src/vm/moar/QAST/QASTOperationsMAST.nqp | |
+++ b/src/vm/moar/QAST/QASTOperationsMAST.nqp | |
@@ -2147,6 +2147,7 @@ QAST::MASTOperations.add_core_moarop_mapping('split', 'split'); | |
QAST::MASTOperations.add_core_moarop_mapping('chr', 'chr'); | |
QAST::MASTOperations.add_core_moarop_mapping('ordfirst', 'ordfirst'); | |
QAST::MASTOperations.add_core_moarop_mapping('ordat', 'ordat'); | |
+QAST::MASTOperations.add_core_moarop_mapping('ordbaseat', 'ordbaseat'); | |
QAST::MASTOperations.add_core_moarop_mapping('indexfrom', 'index_s'); | |
QAST::MASTOperations.add_core_moarop_mapping('rindexfrom', 'rindexfrom'); | |
QAST::MASTOperations.add_core_moarop_mapping('substr_s', 'substr_s'); | |
diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
index 558b3f6..efc1f00 100644 | |
--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
@@ -507,7 +507,20 @@ class QAST::MASTRegexCompiler { | |
method enumcharlist($node) { | |
my @ins; | |
my $op := $node.negate ?? 'indexnat' !! 'indexat'; | |
- nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>)); | |
+ if $node.subtype eq 'ignoremark' { nqp::say('enumcharlist ignoremark: ' ~ $node[0]); | |
+ my $i0 := $!regalloc.fresh_i(); | |
+ my $s0 := $!regalloc.fresh_s(); | |
+ merge_ins(@ins, [ | |
+ op('ge_i', $i0, %!reg<pos>, %!reg<eos>), | |
+ op('if_i', $i0, %!reg<fail>), | |
+ op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>), | |
+ op('chr', $s0, $i0), | |
+ op($op, $s0, %!reg<zero>, sval($node[0]), %!reg<fail>), | |
+ ]); | |
+ } | |
+ else { | |
+ nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>)); | |
+ } | |
nqp::push(@ins, op('inc_i', %!reg<pos>)) | |
unless $node.subtype eq 'zerowidth'; | |
@ins | |
@@ -577,7 +590,8 @@ class QAST::MASTRegexCompiler { | |
method literal($node) { | |
my $litconst := $node[0]; | |
- my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! 'eqat_s'; | |
+ my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! | |
+ $node.subtype eq 'ignoremark' ?? 'eqatim_s' !! 'eqat_s'; | |
my $s0 := $!regalloc.fresh_s(); | |
my $i0 := $!regalloc.fresh_i(); | |
my $cmpop := $node.negate ?? 'if_i' !! 'unless_i'; | |
@@ -922,7 +936,7 @@ class QAST::MASTRegexCompiler { | |
$looplabel, | |
op('inc_i', %!reg<pos>), | |
]; | |
- if $node.list && $node.subtype ne 'ignorecase' { | |
+ if $node.list && $node.subtype ne 'ignorecase' && $node.subtype ne 'ignoremark' { | |
my $lit := $!regalloc.fresh_s(); | |
nqp::push(@ins, op('const_s', $lit, sval($node[0]))); | |
nqp::push(@ins, op('index_s', %!reg<pos>, %!reg<tgt>, $lit, %!reg<pos>)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/Perl6/Actions.nqp b/src/Perl6/Actions.nqp | |
index 0f368ca..4ebcb2b 100644 | |
--- a/src/Perl6/Actions.nqp | |
+++ b/src/Perl6/Actions.nqp | |
@@ -6073,6 +6073,7 @@ Compilation unit '$file' contained the following violations: | |
); | |
my %REGEX_ADVERBS_CANONICAL := hash( | |
ignorecase => 'i', | |
+ ignoremark => 'm', | |
ratchet => 'r', | |
sigspace => 's', | |
continue => 'c', | |
@@ -6096,7 +6097,7 @@ Compilation unit '$file' contained the following violations: | |
ss => 's', | |
); | |
INIT { | |
- my str $mods := 'i ignorecase s sigspace r ratchet Perl5 P5'; | |
+ my str $mods := 'i ignorecase m ignoremark s sigspace r ratchet Perl5 P5'; | |
for nqp::split(' ', $mods) { | |
%SHARED_ALLOWED_ADVERBS{$_} := 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment