Skip to content

Instantly share code, notes, and snippets.

@ammar
Created July 10, 2017 12:32
Show Gist options
  • Save ammar/af54718c71aead909f8a5af4ec5cc40f to your computer and use it in GitHub Desktop.
Save ammar/af54718c71aead909f8a5af4ec5cc40f to your computer and use it in GitHub Desktop.
WIP: Add support for new absence operator
diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl
index e200cfc..96b2e70 100644
--- a/lib/regexp_parser/scanner/scanner.rl
+++ b/lib/regexp_parser/scanner/scanner.rl
@@ -78,6 +78,8 @@
conditional = '(?(';
+ absent_operator = '?~';
+
group_comment = '?#' . [^)]* . group_close;
group_atomic = '?>';
@@ -540,6 +542,12 @@
emit(:group, :comment, *text(data, ts, te))
};
+ # (?~exp) absent operator:
+ # ------------------------------------------------------------------------
+ group_open . absent_operator >group_opened {
+ p = scan_absent_operator(p, data, ts, te)
+ };
+
# Expression options:
# (?imxdau-imx) option on/off
# i: ignore case
@@ -930,6 +938,41 @@ module Regexp::Scanner
p # return the new value of the data pointer
end
+ def self.scan_absent_operator(p, data, ts, te)
+ text = text(data, ts, te).first
+
+ exp_char, exp_length = true, 0
+
+ emit(:absent_op, :open, text(data, ts, te).first, ts, te)
+
+ # Copy until a closing parenthesis, skipping escaped ones.
+ while exp_char
+ if data[te + exp_length]
+ c = data[te + exp_length].chr
+
+ if c == '\\'
+ text << c ; p += 1 ; exp_length += 1
+
+ c = data[te + exp_length].chr
+
+ text << c ; p += 1 ; exp_length += 1
+ elsif c == ')'
+ exp_char = false
+
+ emit(:absent_op, :expression, text, ts, te + exp_length)
+ #emit(:absent_op, :close, c, ts, te + exp_length)
+
+ else
+ text << c ; p += 1 ; exp_length += 1
+ end
+ else
+ raise PrematureEndError.new("absent operator `#{text}'")
+ end
+ end
+
+ p
+ end
+
# Copy from ts to te from data as text
def self.copy(data, range)
data[range].pack('c*')
diff --git a/test/scanner/test_groups.rb b/test/scanner/test_groups.rb
index 5dabb56..aa7062e 100644
--- a/test/scanner/test_groups.rb
+++ b/test/scanner/test_groups.rb
@@ -55,6 +55,15 @@ class ScannerGroups < Test::Unit::TestCase
})
end
+ if RUBY_VERSION >= '2.4.1'
+ tests.merge!({
+ # Absent operator
+ '(?~abc)' => [0, :absent_op, :open, '(?~', 0, 7],
+ '(?~def)' => [1, :absent_op, :expression, 'def', 0, 7],
+ '(?~xyz)' => [2, :absent_op, :close, ')', 0, 7],
+ })
+ end
+
tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
define_method "test_scanner_#{type}_#{token}_#{count}" do
tokens = RS.scan(pattern)
diff --git a/test/warnings.yml b/test/warnings.yml
index e957c37..f1616c8 100644
--- a/test/warnings.yml
+++ b/test/warnings.yml
@@ -1,6 +1,6 @@
---
# Unused variable emitted by ragel
-- "lib/regexp_parser/scanner.rb:1674: warning:
+- "lib/regexp_parser/scanner.rb:1677: warning:
assigned but unused variable - testEof"
# Unavoidable duplicated character range tests
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment