Created
December 27, 2023 12:05
-
-
Save tompng/aa0706a181e9187bd79e8cec5a5f3c97 to your computer and use it in GitHub Desktop.
regexp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Generator | |
def generate_paren(level) | |
regexp, producer = multi(level - 1) | |
[['(', '(?:', '(?<a>'].sample + regexp + ')', producer] | |
end | |
ALPHABET = ('a'..'z').to_a | |
def generate_alphabet(_level) | |
c = ALPHABET.sample | |
[c, -> { c }] | |
end | |
def generate_dot(_level) | |
['.', -> { ALPHABET.sample }] | |
end | |
def generate_range(_level) | |
a, b = ALPHABET.sample(2).sort | |
["[#{a}-#{b}]", -> { (a..b).to_a.sample }] | |
end | |
def single(level) | |
send GENERATE_METHODS.sample, rand(level - 1) | |
end | |
def multi_or(level) | |
regexps, producers = rand(1..3).times.map { multi(level - 1) }.transpose | |
[['(', '(?:'].sample + regexps.join('|') + ')', -> { producers.sample.call }] | |
end | |
def repeat(level) | |
a, b = [['+', 1..4], ['*', 0..3], ['{2,5}', 2..5]].sample | |
regexp, producer = single(level - 1) | |
[regexp + a + ['?', ''].sample, -> { rand(b).times.map { producer.call }.join }] | |
end | |
def multi(level) | |
return single(0) if level < 0 | |
rand < 0.5 ? multi_or(level - 1) : multi_concat(level - 1) | |
end | |
def multi_concat(level) | |
items = rand(1..5).times.map { rand < 0.5 ? single(level - 1) : repeat(level - 1) } | |
items.unshift ['^', ->{'^'}] if rand < 0.2 | |
items.push ['$', ->{'$'}] if rand < 0.2 | |
regexps, producers = items.transpose | |
[regexps.join, -> { producers.map(&:call).join }] | |
end | |
GENERATE_METHODS = instance_methods.grep(/^generate_/) | |
end | |
def rand_s(n) | |
rand(n).times.map { Generator::ALPHABET.sample }.join | |
end | |
$VERBOSE=nil | |
1000000.times do |i| | |
a, b = Generator.new.multi 6 | |
a = Regexp.new(a) | |
next unless Regexp.linear_time?(a) | |
10.times do | |
c = d = b.call | |
next if c.match?(/.\^|\$./) | |
if d.start_with? '^' | |
d = d[1..] | |
else | |
d = rand_s(5) + d | |
end | |
if d.end_with? '$' | |
d = d[...-1] | |
else | |
d += rand_s(5) | |
end | |
unless a.match? d | |
p [a, c, d] | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment