Skip to content

Instantly share code, notes, and snippets.

@judofyr
Last active January 13, 2019 18:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save judofyr/80bd41893985f49b7550294b295d3afe to your computer and use it in GitHub Desktop.
Save judofyr/80bd41893985f49b7550294b295d3afe to your computer and use it in GitHub Desktop.
Parser for CFG based on Glushkov's construction algorithm
from pylab import *
data = loadtxt('build/complexity.txt')
n = len(data)
# Fit a polynom on the first half
fst = data[:n//2]
p2 = polyfit(fst[:, 0], fst[:, 1], 2)
p3 = polyfit(fst[:, 0], fst[:, 1], 3)
plot(data[:, 0], data[:, 1], label="Data")
plot(data[:, 0], polyval(p2, data[:, 0]), label="n^2, fitted")
plot(data[:, 0], polyval(p3, data[:, 0]), label="n^3, fitted")
legend(loc="best")
show()
require_relative 'glush'
require 'benchmark'
# This has an exponential amount of parse trees,
# but should be recognized in O(n^k) where k<=3.
S = Grammar.build {
lazy \
def s
c("n") | (s >> c("+") >> s)
end
s
}
(50..130).each do |i|
input = "n" + ("+n")*i
print i, " "
time = Benchmark.measure {
m = Matcher.new(S)
m.parse(input) or raise "failed to parse"
}
puts time.real
$stdout.flush
end
require 'set'
require 'pp'
class Parser
attr_accessor :is_empty
def empty?
if defined?(@is_empty)
@is_empty
else
raise "empty is not computed, #{self.class}"
end
end
def >>(other)
Seq.new(self, other)
end
def |(other)
Alt.new(self, other)
end
def many
many1.maybe
end
def many1
Many1.new(self)
end
def maybe
Maybe.new(self)
end
end
class Char < Parser
def initialize(char)
@char = char
end
def matches?(char)
@char == char
end
def calculate_empty(b)
@is_empty = false
end
def first_set
Set[self]
end
def last_set
Set[self]
end
def each_pair
end
end
class Eps < Parser
def empty?
true
end
def calculate_empty(b)
@is_empty = true
end
def first_set
Set[]
end
def last_set
Set[]
end
def each_pair
end
end
class Alt < Parser
def initialize(left, right)
@left = left
@right = right
end
def calculate_empty(b)
@is_empty = @left.calculate_empty(b) | @right.calculate_empty(b)
end
def first_set
@first_set ||= @left.first_set | @right.first_set
end
def last_set
@last_set ||= @left.last_set | @right.last_set
end
def each_pair(&blk)
@left.each_pair(&blk)
@right.each_pair(&blk)
end
end
class Seq < Parser
def initialize(left, right)
@left = left
@right = right
end
def calculate_empty(b)
@is_empty = @left.calculate_empty(b) & @right.calculate_empty(b)
end
def first_set
@first_set ||= @left.empty? ? (@left.first_set | @right.first_set) : @left.first_set
end
def last_set
@last_set ||= @right.empty? ? (@left.last_set | @right.last_set) : @right.last_set
end
def each_pair(&blk)
@left.each_pair(&blk)
@right.each_pair(&blk)
@left.last_set.each do |a|
@right.first_set.each do |b|
yield a, b
end
end
end
end
class Maybe < Parser
def initialize(child)
@child = child
end
def calculate_empty(b)
@child.calculate_empty(b)
@is_empty = true
end
def first_set
@child.first_set
end
def last_set
@child.last_set
end
def each_pair(&blk)
@child.each_pair(&blk)
end
end
class Many1 < Parser
def initialize(child)
@child = child
end
def calculate_empty(b)
@is_empty = @child.calculate_empty(b)
end
def first_set
@child.first_set
end
def last_set
@child.last_set
end
def each_pair(&blk)
@child.each_pair(&blk)
@child.last_set.each do |a|
@child.first_set.each do |b|
yield a, b
end
end
end
end
class Rule
def initialize(&blk)
@code = blk
end
def call
RuleCall.new(self)
end
def force
@force ||= @code.call
end
def each_pair(&blk)
force.each_pair(&blk)
force.last_set.each do |a|
# This is maybe a bit hacky since a Rule strictly speaking isn't
# a parser. This is here to represent the fact that the last_set
# will complete this rule.
yield a, self
end
end
end
class RuleCall < Parser
attr_reader :rule
def initialize(rule)
@rule = rule
end
def calculate_empty(b)
@is_empty = b.include?(@rule)
end
def first_set
Set[self]
end
def last_set
Set[self]
end
def each_pair(&blk)
end
end
class Grammar
def self.build(&blk)
new(&blk)
end
attr_reader :transitions, :start_parser
def initialize(&blk)
@rules = []
@start_parser = instance_eval(&blk)
if !@start_parser.is_a?(Parser)
raise "block must return a parser"
end
_compute_empty
_compute_transitions
end
def c(c)
Char.new(c)
end
def eps
Eps.new
end
def call(n)
case n
when Symbol
send(n)
else
n.call
end
end
def sep_by(p, sep)
sep_by1(p, sep).maybe
end
def sep_by1(p, sep)
call(p) >> (call(sep) >> call(p)).many
end
def end_by(p, sep)
end_by1(p, sep).maybe
end
def end_by1(p, sep)
(call(p) >> call(sep)).many1
end
def lazy(name)
old = method(name)
rule = Rule.new { old.call }
@rules << rule
define_singleton_method(name) { rule.call }
nil
end
private
def _compute_empty
empty_rules = Set.new
begin
before = empty_rules.size
@rules.each do |rule|
is_empty = rule.force.calculate_empty(empty_rules)
empty_rules << rule if is_empty
end
after = empty_rules.size
end while before != after
@start_parser.calculate_empty(empty_rules)
end
def _compute_transitions
@transitions = Hash.new { |h, k| h[k] = Set.new }
@rules.each do |rule|
rule.each_pair do |a, b|
@transitions[a] << b
end
end
@start_parser.each_pair do |a, b|
@transitions[a] << b
end
@start_parser.last_set.each do |lst|
@transitions[lst] << :eof
end
end
end
# The left_pos refers to the position where the current rule started.
State = Struct.new(:parser, :left_pos)
Call = Struct.new(:rule, :left_pos)
class CallState
attr_reader :returns
def initialize
@returns = []
@finished = Set.new
end
def add_return(state)
@returns << state
end
def finish(pos)
@returns.freeze
if @finished.include?(pos)
false
else
@finished << pos
true
end
end
end
class Matcher
def initialize(grammar)
@parser = grammar.start_parser
@transitions = grammar.transitions
@call_states = {}
end
def parse(input)
if input.empty?
return @parser.empty?
end
active_states = nil
input.each_char.each_with_index do |char, idx|
if active_states.nil?
active_states = parse_first(char, idx)
else
active_states = parse_next(active_states, char, idx)
end
# quick return when there's no more active states
return false if active_states.empty?
end
# parse the end-of-string marker
active_states = parse_next(active_states, nil, input.size)
active_states.any? { |s| s.parser == :eof }
end
def parse_first(char, pos)
new_states = []
@parser.first_set.each do |target|
transition(target, char, pos, nil) do |new_state|
new_states << new_state
end
end
new_states
end
def parse_next(active_states, char, pos)
new_states = []
transition_from_states(active_states, char, pos) do |new_state|
new_states << new_state
end
new_states
end
def transition_from_states(states, char, pos, &blk)
states.each do |state|
@transitions[state.parser].each do |target|
transition(target, char, pos, state.left_pos, &blk)
end
end
end
# Attempt to move into the parser-node, yielding states
def transition(parser, char, pos, left_pos, &blk)
case parser
when :eof
if char.nil?
yield State.new(:eof, left_pos)
end
when Char
if parser.matches?(char)
yield State.new(parser, left_pos)
end
when RuleCall
call = Call.new(parser.rule, pos)
call_state = @call_states[call]
if !call_state
# This is the first time this rule has been called at this posiotn
# Setup the returns array
@call_states[call] = call_state = CallState.new
# ... and kick off the first actions
parser.rule.force.first_set.each do |fst|
transition(fst, char, pos, pos, &blk)
end
end
# and push the state which we want to complete after the rule succeeds
call_state.add_return(State.new(parser, left_pos))
when Rule
# This implies that a rule is complete
# Find the call which it corresponds to
call = Call.new(parser, left_pos)
call_state = @call_states.fetch(call)
if call_state.finish(pos)
# Now we can transition out
transition_from_states(call_state.returns, char, pos, &blk)
end
else
raise "ehm: #{parser.class}"
end
end
end
task :default => :test
desc "Run tests"
task :test do
sh 'ruby', 'test.rb'
end
file "build" do
mkdir "build"
end
file "build/complexity.txt" => "build" do
sh 'ruby complexity.rb | tee build/complexity.txt'
end
task :complexity => "build/complexity.txt" do
sh 'python3', 'complexity.py'
end
require_relative 'glush'
require 'minitest/autorun'
class TestParser < Minitest::Test
def recognize?(parser, str)
m = Matcher.new(parser)
m.parse(str)
end
## Matched parens
PAREN = Grammar.build {
lazy \
def paren
eps | (c("(") >> paren >> c(")"))
end
paren
}
def test_paren_empty
assert recognize?(PAREN, "")
end
def test_paren_one
assert recognize?(PAREN, "()")
end
def test_paren_two
assert recognize?(PAREN, "(())")
end
def test_paren_close_missing
refute recognize?(PAREN, "(()")
end
def test_paren_close_extra
refute recognize?(PAREN, "())")
end
## n+n
S = Grammar.build {
lazy \
def s
c("n") | (s >> c("+") >> s)
end
s
}
def test_s_one
assert recognize?(S, "n")
end
def test_s_two
assert recognize?(S, "n+n")
refute recognize?(S, "n+")
end
def test_s_three
assert recognize?(S, "n+n+n")
refute recognize?(S, "n+n+")
end
## Combinators
A = Grammar.build { c("a").many }
AP = Grammar.build { c("a").many1 }
def test_rep
assert recognize?(A, "")
assert recognize?(A, "a")
assert recognize?(A, "aaaaa")
refute recognize?(AP, "")
assert recognize?(AP, "a")
assert recognize?(AP, "aaaaa")
end
ONE_LST = Grammar.build {
def one
c("1")
end
def sep
c(",") >> c(" ").many
end
sep_by(:one, :sep)
}
def test_sep_by
assert recognize?(ONE_LST, "")
assert recognize?(ONE_LST, "1")
assert recognize?(ONE_LST, "1,1")
assert recognize?(ONE_LST, "1, 1")
refute recognize?(ONE_LST, "1, ")
end
ONE_LST_END = Grammar.build {
def one
c("1")
end
def sep
c(",") >> c(" ").many
end
end_by(:one, :sep)
}
def test_end_by
assert recognize?(ONE_LST_END, "")
refute recognize?(ONE_LST_END, "1")
assert recognize?(ONE_LST_END, "1,")
assert recognize?(ONE_LST_END, "1, 1,")
assert recognize?(ONE_LST_END, "1, ")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment