Skip to content

Instantly share code, notes, and snippets.

@FROGGS
Created May 9, 2015 08:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FROGGS/1a0cb3448b7f014511d8 to your computer and use it in GitHub Desktop.
Save FROGGS/1a0cb3448b7f014511d8 to your computer and use it in GitHub Desktop.
diff --git a/src/QRegex/NFA.nqp b/src/QRegex/NFA.nqp
index 2b51468..09e3919 100644
--- a/src/QRegex/NFA.nqp
+++ b/src/QRegex/NFA.nqp
@@ -21,6 +21,8 @@ class QRegex::NFA {
my $EDGE_CHARRANGE_NEG := 13;
my $EDGE_CODEPOINT_LL := 14;
my $EDGE_CODEPOINT_I_LL := 15;
+ my $EDGE_CODEPOINT_M := 16;
+ my $EDGE_CODEPOINT_M_NEG := 17;
my $ACTIONS;
my $nfadeb;
@@ -171,7 +173,7 @@ class QRegex::NFA {
%cclass_code<n> := nqp::const::CCLASS_NEWLINE;
%cclass_code<nl> := nqp::const::CCLASS_NEWLINE;
$nfadeb := nqp::existskey(nqp::getenvhash(),'NQP_NFA_DEB');
- $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL'];
+ $ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL','CODEPOINT_M','CODEPOINT_M_NEG'];
# $ind := 0;
# $indent := '';
$nfatime := 0;
@@ -244,6 +246,14 @@ class QRegex::NFA {
dentout(self.addedge($from, $to, $!LITEND ?? $EDGE_CODEPOINT_I !! $EDGE_CODEPOINT_I_LL,
[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]));
}
+ elsif $node.subtype eq 'ignoremark' {
+ my str $litconst := $node[0];
+ while $i < $litlen {
+ $from := self.addedge($from, -1, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i));
+ $i := $i + 1;
+ }
+ dentout(self.addedge($from, $to, $EDGE_CODEPOINT_M, nqp::ord($litconst, $i)));
+ }
else {
my str $litconst := $node[0];
while $i < $litlen {
diff --git a/src/QRegex/P6Regex/Actions.nqp b/src/QRegex/P6Regex/Actions.nqp
index f55e543..4390dd6 100644
--- a/src/QRegex/P6Regex/Actions.nqp
+++ b/src/QRegex/P6Regex/Actions.nqp
@@ -120,6 +120,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
else {
my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
$qast.subtype('ignorecase') if %*RX<i>;
+ $qast.subtype('ignoremark') if %*RX<m>;
make $qast;
}
}
@@ -692,10 +693,11 @@ class QRegex::P6Regex::Actions is HLL::Actions {
}
else {
my $c := ~$_[0];
- $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
+ #~ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
+ $str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! %*RX<m> ?? nqp::chr(nqp::ordbaseat($c, 0)) !! $c);
}
}
- @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
+ @alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ), :subtype(%*RX<m> ?? 'ignoremark' !! '') ))
if nqp::chars($str);
$qast := +@alts == 1 ?? @alts[0] !!
$<sign> eq '-' ??
diff --git a/src/QRegex/P6Regex/Grammar.nqp b/src/QRegex/P6Regex/Grammar.nqp
index 2e9de71..784c390 100644
--- a/src/QRegex/P6Regex/Grammar.nqp
+++ b/src/QRegex/P6Regex/Grammar.nqp
@@ -453,6 +453,12 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {
proto token mod_ident { <...> }
token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? » }
+ token mod_ident:sym<ignoremark> {
+ [
+ | $<sym>=[m]
+ | 'ignore' $<sym>=[m] 'ark'
+ ] »
+ }
token mod_ident:sym<ratchet> { $<sym>=[r] 'atchet'? » }
token mod_ident:sym<sigspace> { $<sym>=[s] 'igspace'? » }
token mod_ident:sym<dba> { <sym> » }
diff --git a/src/vm/moar/QAST/QASTOperationsMAST.nqp b/src/vm/moar/QAST/QASTOperationsMAST.nqp
index f0bd11e..583be8f 100644
--- a/src/vm/moar/QAST/QASTOperationsMAST.nqp
+++ b/src/vm/moar/QAST/QASTOperationsMAST.nqp
@@ -2147,6 +2147,7 @@ QAST::MASTOperations.add_core_moarop_mapping('split', 'split');
QAST::MASTOperations.add_core_moarop_mapping('chr', 'chr');
QAST::MASTOperations.add_core_moarop_mapping('ordfirst', 'ordfirst');
QAST::MASTOperations.add_core_moarop_mapping('ordat', 'ordat');
+QAST::MASTOperations.add_core_moarop_mapping('ordbaseat', 'ordbaseat');
QAST::MASTOperations.add_core_moarop_mapping('indexfrom', 'index_s');
QAST::MASTOperations.add_core_moarop_mapping('rindexfrom', 'rindexfrom');
QAST::MASTOperations.add_core_moarop_mapping('substr_s', 'substr_s');
diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
index 558b3f6..efc1f00 100644
--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
@@ -507,7 +507,20 @@ class QAST::MASTRegexCompiler {
method enumcharlist($node) {
my @ins;
my $op := $node.negate ?? 'indexnat' !! 'indexat';
- nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
+ if $node.subtype eq 'ignoremark' { nqp::say('enumcharlist ignoremark: ' ~ $node[0]);
+ my $i0 := $!regalloc.fresh_i();
+ my $s0 := $!regalloc.fresh_s();
+ merge_ins(@ins, [
+ op('ge_i', $i0, %!reg<pos>, %!reg<eos>),
+ op('if_i', $i0, %!reg<fail>),
+ op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
+ op('chr', $s0, $i0),
+ op($op, $s0, %!reg<zero>, sval($node[0]), %!reg<fail>),
+ ]);
+ }
+ else {
+ nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
+ }
nqp::push(@ins, op('inc_i', %!reg<pos>))
unless $node.subtype eq 'zerowidth';
@ins
@@ -577,7 +590,8 @@ class QAST::MASTRegexCompiler {
method literal($node) {
my $litconst := $node[0];
- my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! 'eqat_s';
+ my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !!
+ $node.subtype eq 'ignoremark' ?? 'eqatim_s' !! 'eqat_s';
my $s0 := $!regalloc.fresh_s();
my $i0 := $!regalloc.fresh_i();
my $cmpop := $node.negate ?? 'if_i' !! 'unless_i';
@@ -922,7 +936,7 @@ class QAST::MASTRegexCompiler {
$looplabel,
op('inc_i', %!reg<pos>),
];
- if $node.list && $node.subtype ne 'ignorecase' {
+ if $node.list && $node.subtype ne 'ignorecase' && $node.subtype ne 'ignoremark' {
my $lit := $!regalloc.fresh_s();
nqp::push(@ins, op('const_s', $lit, sval($node[0])));
nqp::push(@ins, op('index_s', %!reg<pos>, %!reg<tgt>, $lit, %!reg<pos>));
diff --git a/src/Perl6/Actions.nqp b/src/Perl6/Actions.nqp
index 0f368ca..4ebcb2b 100644
--- a/src/Perl6/Actions.nqp
+++ b/src/Perl6/Actions.nqp
@@ -6073,6 +6073,7 @@ Compilation unit '$file' contained the following violations:
);
my %REGEX_ADVERBS_CANONICAL := hash(
ignorecase => 'i',
+ ignoremark => 'm',
ratchet => 'r',
sigspace => 's',
continue => 'c',
@@ -6096,7 +6097,7 @@ Compilation unit '$file' contained the following violations:
ss => 's',
);
INIT {
- my str $mods := 'i ignorecase s sigspace r ratchet Perl5 P5';
+ my str $mods := 'i ignorecase m ignoremark s sigspace r ratchet Perl5 P5';
for nqp::split(' ', $mods) {
%SHARED_ALLOWED_ADVERBS{$_} := 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment