judofyr/.gitignore Secret

## .gitignore
/build

## complexity.py
from pylab import *

data = loadtxt('build/complexity.txt')
n = len(data)

# Fit a polynom on the first half
fst = data[:n//2]
p2 = polyfit(fst[:, 0], fst[:, 1], 2)
p3 = polyfit(fst[:, 0], fst[:, 1], 3)

plot(data[:, 0], data[:, 1], label="Data")
plot(data[:, 0], polyval(p2, data[:, 0]), label="n^2, fitted")
plot(data[:, 0], polyval(p3, data[:, 0]), label="n^3, fitted")
legend(loc="best")
show()


## complexity.rb
require_relative 'glush'
require 'benchmark'

# This has an exponential amount of parse trees,
# but should be recognized in O(n^k) where k<=3.

S = Grammar.build {
  lazy \
  def s
    c("n") | (s >> c("+") >> s)
  end

  s
}

(50..130).each do |i|
  input = "n" + ("+n")*i
  print i, " "
  time = Benchmark.measure {
    m = Matcher.new(S)
    m.parse(input) or raise "failed to parse"
  }
  puts time.real
  $stdout.flush
end


## glush.rb
require 'set'
require 'pp'

class Parser
  attr_accessor :is_empty

  def empty?
    if defined?(@is_empty)
      @is_empty
    else
      raise "empty is not computed, #{self.class}"
    end
  end

  def >>(other)
    Seq.new(self, other)
  end

  def |(other)
    Alt.new(self, other)
  end

  def many
    many1.maybe
  end

  def many1
    Many1.new(self)
  end

  def maybe
    Maybe.new(self)
  end
end

class Char < Parser
  def initialize(char)
    @char = char
  end

  def matches?(char)
    @char == char
  end

  def calculate_empty(b)
    @is_empty = false
  end

  def first_set
    Set[self]
  end

  def last_set
    Set[self]
  end

  def each_pair
  end
end

class Eps < Parser
  def empty?
    true
  end

  def calculate_empty(b)
    @is_empty = true
  end

  def first_set
    Set[]
  end

  def last_set
    Set[]
  end

  def each_pair
  end
end

class Alt < Parser
  def initialize(left, right)
    @left = left
    @right = right
  end

  def calculate_empty(b)
    @is_empty = @left.calculate_empty(b) | @right.calculate_empty(b)
  end

  def first_set
    @first_set ||= @left.first_set | @right.first_set
  end

  def last_set
    @last_set ||= @left.last_set | @right.last_set
  end

  def each_pair(&blk)
    @left.each_pair(&blk)
    @right.each_pair(&blk)
  end
end

class Seq < Parser
  def initialize(left, right)
    @left = left
    @right = right
  end

  def calculate_empty(b)
    @is_empty = @left.calculate_empty(b) & @right.calculate_empty(b)
  end

  def first_set
    @first_set ||= @left.empty? ? (@left.first_set | @right.first_set) : @left.first_set
  end

  def last_set
    @last_set ||= @right.empty? ? (@left.last_set | @right.last_set) : @right.last_set
  end

  def each_pair(&blk)
    @left.each_pair(&blk)
    @right.each_pair(&blk)

    @left.last_set.each do |a|
      @right.first_set.each do |b|
        yield a, b
      end
    end
  end
end

class Maybe < Parser
  def initialize(child)
    @child = child
  end

  def calculate_empty(b)
    @child.calculate_empty(b)
    @is_empty = true
  end

  def first_set
    @child.first_set
  end

  def last_set
    @child.last_set
  end

  def each_pair(&blk)
    @child.each_pair(&blk)
  end
end

class Many1 < Parser
  def initialize(child)
    @child = child
  end

  def calculate_empty(b)
    @is_empty = @child.calculate_empty(b)
  end

  def first_set
    @child.first_set
  end

  def last_set
    @child.last_set
  end

  def each_pair(&blk)
    @child.each_pair(&blk)
    @child.last_set.each do |a|
      @child.first_set.each do |b|
        yield a, b
      end
    end
  end
end

class Rule
  def initialize(&blk)
    @code = blk
  end

  def call
    RuleCall.new(self)
  end

  def force
    @force ||= @code.call
  end

  def each_pair(&blk)
    force.each_pair(&blk)
    force.last_set.each do |a|
      # This is maybe a bit hacky since a Rule strictly speaking isn't
      # a parser. This is here to represent the fact that the last_set
      # will complete this rule.
      yield a, self
    end
  end
end

class RuleCall < Parser
  attr_reader :rule

  def initialize(rule)
    @rule = rule
  end

  def calculate_empty(b)
    @is_empty = b.include?(@rule)
  end

  def first_set
    Set[self]
  end

  def last_set
    Set[self]
  end

  def each_pair(&blk)
  end
end

class Grammar
  def self.build(&blk)
    new(&blk)
  end

  attr_reader :transitions, :start_parser

  def initialize(&blk)
    @rules = []

    @start_parser = instance_eval(&blk)
    if !@start_parser.is_a?(Parser)
      raise "block must return a parser"
    end

    _compute_empty
    _compute_transitions
  end

  def c(c)
    Char.new(c)
  end

  def eps
    Eps.new
  end

  def call(n)
    case n
    when Symbol
      send(n)
    else
      n.call
    end
  end

  def sep_by(p, sep)
    sep_by1(p, sep).maybe
  end

  def sep_by1(p, sep)
    call(p) >> (call(sep) >> call(p)).many
  end

  def end_by(p, sep)
    end_by1(p, sep).maybe
  end

  def end_by1(p, sep)
    (call(p) >> call(sep)).many1
  end

  def lazy(name)
    old = method(name)
    rule = Rule.new { old.call }
    @rules << rule
    define_singleton_method(name) { rule.call }
    nil
  end

  private

  def _compute_empty
    empty_rules = Set.new

    begin
      before = empty_rules.size
      @rules.each do |rule|
        is_empty = rule.force.calculate_empty(empty_rules)
        empty_rules << rule if is_empty
      end
      after = empty_rules.size
    end while before != after

    @start_parser.calculate_empty(empty_rules)
  end

  def _compute_transitions
    @transitions = Hash.new { |h, k| h[k] = Set.new }

    @rules.each do |rule|
      rule.each_pair do |a, b|
        @transitions[a] << b
      end
    end

    @start_parser.each_pair do |a, b|
      @transitions[a] << b
    end

    @start_parser.last_set.each do |lst|
      @transitions[lst] << :eof
    end
  end
end


# The left_pos refers to the position where the current rule started.

State = Struct.new(:parser, :left_pos)
Call = Struct.new(:rule, :left_pos)

class CallState
  attr_reader :returns

  def initialize
    @returns = []
    @finished = Set.new
  end

  def add_return(state)
    @returns << state
  end

  def finish(pos)
    @returns.freeze
    if @finished.include?(pos)
      false
    else
      @finished << pos
      true
    end
  end
end

class Matcher
  def initialize(grammar)
    @parser = grammar.start_parser
    @transitions = grammar.transitions
    @call_states = {}
  end

  def parse(input)
    if input.empty?
      return @parser.empty?
    end

    active_states = nil

    input.each_char.each_with_index do |char, idx|
      if active_states.nil?
        active_states = parse_first(char, idx)
      else
        active_states = parse_next(active_states, char, idx)
      end

      # quick return when there's no more active states
      return false if active_states.empty?
    end

    # parse the end-of-string marker
    active_states = parse_next(active_states, nil, input.size)

    active_states.any? { |s| s.parser == :eof }
  end

  def parse_first(char, pos)
    new_states = []
    @parser.first_set.each do |target|
      transition(target, char, pos, nil) do |new_state|
        new_states << new_state
      end
    end
    new_states
  end

  def parse_next(active_states, char, pos)
    new_states = []
    transition_from_states(active_states, char, pos) do |new_state|
      new_states << new_state
    end
    new_states
  end

  def transition_from_states(states, char, pos, &blk)
    states.each do |state|
      @transitions[state.parser].each do |target|
        transition(target, char, pos, state.left_pos, &blk)
      end
    end
  end

  # Attempt to move into the parser-node, yielding states
  def transition(parser, char, pos, left_pos, &blk)
    case parser
    when :eof
      if char.nil?
        yield State.new(:eof, left_pos)
      end
    when Char
      if parser.matches?(char)
        yield State.new(parser, left_pos)
      end
    when RuleCall
      call = Call.new(parser.rule, pos)
      call_state = @call_states[call]
      if !call_state
        # This is the first time this rule has been called at this posiotn

        # Setup the returns array
        @call_states[call] = call_state = CallState.new

        # ... and kick off the first actions
        parser.rule.force.first_set.each do |fst|
          transition(fst, char, pos, pos, &blk)
        end
      end

      # and push the state which we want to complete after the rule succeeds
      call_state.add_return(State.new(parser, left_pos))
    when Rule
      # This implies that a rule is complete

      # Find the call which it corresponds to
      call = Call.new(parser, left_pos)
      call_state = @call_states.fetch(call)

      if call_state.finish(pos)
        # Now we can transition out
        transition_from_states(call_state.returns, char, pos, &blk)
      end
    else
      raise "ehm: #{parser.class}"
    end
  end
end


## Rakefile
task :default => :test

desc "Run tests"
task :test do
  sh 'ruby', 'test.rb'
end

file "build" do
  mkdir "build"
end

file "build/complexity.txt" => "build" do
  sh 'ruby complexity.rb | tee build/complexity.txt'
end

task :complexity => "build/complexity.txt" do
  sh 'python3', 'complexity.py'
end


## test.rb
require_relative 'glush'
require 'minitest/autorun'

class TestParser < Minitest::Test
  def recognize?(parser, str)
    m = Matcher.new(parser)
    m.parse(str)
  end

  ## Matched parens

  PAREN = Grammar.build {
    lazy \
    def paren
      eps | (c("(") >> paren >> c(")"))
    end

    paren
  }

  def test_paren_empty
    assert recognize?(PAREN, "")
  end

  def test_paren_one
    assert recognize?(PAREN, "()")
  end

  def test_paren_two
    assert recognize?(PAREN, "(())")
  end

  def test_paren_close_missing
    refute recognize?(PAREN, "(()")
  end

  def test_paren_close_extra
    refute recognize?(PAREN, "())")
  end

  ## n+n
  S = Grammar.build {
    lazy \
    def s
      c("n") | (s >> c("+") >> s)
    end

    s
  }

  def test_s_one
    assert recognize?(S, "n")
  end

  def test_s_two
    assert recognize?(S, "n+n")
    refute recognize?(S, "n+")
  end

  def test_s_three
    assert recognize?(S, "n+n+n")
    refute recognize?(S, "n+n+")
  end

  ## Combinators
  A = Grammar.build { c("a").many }
  AP = Grammar.build { c("a").many1 }

  def test_rep
    assert recognize?(A, "")
    assert recognize?(A, "a")
    assert recognize?(A, "aaaaa")

    refute recognize?(AP, "")
    assert recognize?(AP, "a")
    assert recognize?(AP, "aaaaa")
  end

  ONE_LST = Grammar.build {
    def one
      c("1")
    end

    def sep
      c(",") >> c(" ").many
    end

    sep_by(:one, :sep)
  }

  def test_sep_by
    assert recognize?(ONE_LST, "")
    assert recognize?(ONE_LST, "1")
    assert recognize?(ONE_LST, "1,1")
    assert recognize?(ONE_LST, "1,   1")
    refute recognize?(ONE_LST, "1,   ")
  end

  ONE_LST_END = Grammar.build {
    def one
      c("1")
    end

    def sep
      c(",") >> c(" ").many
    end

    end_by(:one, :sep)
  }

  def test_end_by
    assert recognize?(ONE_LST_END, "")
    refute recognize?(ONE_LST_END, "1")
    assert recognize?(ONE_LST_END, "1,")
    assert recognize?(ONE_LST_END, "1,   1,")
    assert recognize?(ONE_LST_END, "1,   ")
  end
end
	from pylab import *

	data = loadtxt('build/complexity.txt')
	n = len(data)

	# Fit a polynom on the first half
	fst = data[:n//2]
	p2 = polyfit(fst[:, 0], fst[:, 1], 2)
	p3 = polyfit(fst[:, 0], fst[:, 1], 3)

	plot(data[:, 0], data[:, 1], label="Data")
	plot(data[:, 0], polyval(p2, data[:, 0]), label="n^2, fitted")
	plot(data[:, 0], polyval(p3, data[:, 0]), label="n^3, fitted")
	legend(loc="best")
	show()
	require_relative 'glush'
	require 'benchmark'

	# This has an exponential amount of parse trees,
	# but should be recognized in O(n^k) where k<=3.

	S = Grammar.build {
	lazy \
	def s
	c("n") \| (s >> c("+") >> s)
	end

	s
	}

	(50..130).each do \|i\|
	input = "n" + ("+n")*i
	print i, " "
	time = Benchmark.measure {
	m = Matcher.new(S)
	m.parse(input) or raise "failed to parse"
	}
	puts time.real
	$stdout.flush
	end
	require 'set'
	require 'pp'

	class Parser
	attr_accessor :is_empty

	def empty?
	if defined?(@is_empty)
	@is_empty
	else
	raise "empty is not computed, #{self.class}"
	end
	end

	def >>(other)
	Seq.new(self, other)
	end

	def \|(other)
	Alt.new(self, other)
	end

	def many
	many1.maybe
	end

	def many1
	Many1.new(self)
	end

	def maybe
	Maybe.new(self)
	end
	end

	class Char < Parser
	def initialize(char)
	@char = char
	end

	def matches?(char)
	@char == char
	end

	def calculate_empty(b)
	@is_empty = false
	end

	def first_set
	Set[self]
	end

	def last_set
	Set[self]
	end

	def each_pair
	end
	end

	class Eps < Parser
	def empty?
	true
	end

	def calculate_empty(b)
	@is_empty = true
	end

	def first_set
	Set[]
	end

	def last_set
	Set[]
	end

	def each_pair
	end
	end

	class Alt < Parser
	def initialize(left, right)
	@left = left
	@right = right
	end

	def calculate_empty(b)
	@is_empty = @left.calculate_empty(b) \| @right.calculate_empty(b)
	end

	def first_set
	@first_set \|\|= @left.first_set \| @right.first_set
	end

	def last_set
	@last_set \|\|= @left.last_set \| @right.last_set
	end

	def each_pair(&blk)
	@left.each_pair(&blk)
	@right.each_pair(&blk)
	end
	end

	class Seq < Parser
	def initialize(left, right)
	@left = left
	@right = right
	end

	def calculate_empty(b)
	@is_empty = @left.calculate_empty(b) & @right.calculate_empty(b)
	end

	def first_set
	@first_set \|\|= @left.empty? ? (@left.first_set \| @right.first_set) : @left.first_set
	end

	def last_set
	@last_set \|\|= @right.empty? ? (@left.last_set \| @right.last_set) : @right.last_set
	end

	def each_pair(&blk)
	@left.each_pair(&blk)
	@right.each_pair(&blk)

	@left.last_set.each do \|a\|
	@right.first_set.each do \|b\|
	yield a, b
	end
	end
	end
	end

	class Maybe < Parser
	def initialize(child)
	@child = child
	end

	def calculate_empty(b)
	@child.calculate_empty(b)
	@is_empty = true
	end

	def first_set
	@child.first_set
	end

	def last_set
	@child.last_set
	end

	def each_pair(&blk)
	@child.each_pair(&blk)
	end
	end

	class Many1 < Parser
	def initialize(child)
	@child = child
	end

	def calculate_empty(b)
	@is_empty = @child.calculate_empty(b)
	end

	def first_set
	@child.first_set
	end

	def last_set
	@child.last_set
	end

	def each_pair(&blk)
	@child.each_pair(&blk)
	@child.last_set.each do \|a\|
	@child.first_set.each do \|b\|
	yield a, b
	end
	end
	end
	end

	class Rule
	def initialize(&blk)
	@code = blk
	end

	def call
	RuleCall.new(self)
	end

	def force
	@force \|\|= @code.call
	end

	def each_pair(&blk)
	force.each_pair(&blk)
	force.last_set.each do \|a\|
	# This is maybe a bit hacky since a Rule strictly speaking isn't
	# a parser. This is here to represent the fact that the last_set
	# will complete this rule.
	yield a, self
	end
	end
	end

	class RuleCall < Parser
	attr_reader :rule

	def initialize(rule)
	@rule = rule
	end

	def calculate_empty(b)
	@is_empty = b.include?(@rule)
	end

	def first_set
	Set[self]
	end

	def last_set
	Set[self]
	end

	def each_pair(&blk)
	end
	end

	class Grammar
	def self.build(&blk)
	new(&blk)
	end

	attr_reader :transitions, :start_parser

	def initialize(&blk)
	@rules = []

	@start_parser = instance_eval(&blk)
	if !@start_parser.is_a?(Parser)
	raise "block must return a parser"
	end

	_compute_empty
	_compute_transitions
	end

	def c(c)
	Char.new(c)
	end

	def eps
	Eps.new
	end

	def call(n)
	case n
	when Symbol
	send(n)
	else
	n.call
	end
	end

	def sep_by(p, sep)
	sep_by1(p, sep).maybe
	end

	def sep_by1(p, sep)
	call(p) >> (call(sep) >> call(p)).many
	end

	def end_by(p, sep)
	end_by1(p, sep).maybe
	end

	def end_by1(p, sep)
	(call(p) >> call(sep)).many1
	end

	def lazy(name)
	old = method(name)
	rule = Rule.new { old.call }
	@rules << rule
	define_singleton_method(name) { rule.call }
	nil
	end

	private

	def _compute_empty
	empty_rules = Set.new

	begin
	before = empty_rules.size
	@rules.each do \|rule\|
	is_empty = rule.force.calculate_empty(empty_rules)
	empty_rules << rule if is_empty
	end
	after = empty_rules.size
	end while before != after

	@start_parser.calculate_empty(empty_rules)
	end

	def _compute_transitions
	@transitions = Hash.new { \|h, k\| h[k] = Set.new }

	@rules.each do \|rule\|
	rule.each_pair do \|a, b\|
	@transitions[a] << b
	end
	end

	@start_parser.each_pair do \|a, b\|
	@transitions[a] << b
	end

	@start_parser.last_set.each do \|lst\|
	@transitions[lst] << :eof
	end
	end
	end


	# The left_pos refers to the position where the current rule started.

	State = Struct.new(:parser, :left_pos)
	Call = Struct.new(:rule, :left_pos)

	class CallState
	attr_reader :returns

	def initialize
	@returns = []
	@finished = Set.new
	end

	def add_return(state)
	@returns << state
	end

	def finish(pos)
	@returns.freeze
	if @finished.include?(pos)
	false
	else
	@finished << pos
	true
	end
	end
	end

	class Matcher
	def initialize(grammar)
	@parser = grammar.start_parser
	@transitions = grammar.transitions
	@call_states = {}
	end

	def parse(input)
	if input.empty?
	return @parser.empty?
	end

	active_states = nil

	input.each_char.each_with_index do \|char, idx\|
	if active_states.nil?
	active_states = parse_first(char, idx)
	else
	active_states = parse_next(active_states, char, idx)
	end

	# quick return when there's no more active states
	return false if active_states.empty?
	end

	# parse the end-of-string marker
	active_states = parse_next(active_states, nil, input.size)

	active_states.any? { \|s\| s.parser == :eof }
	end

	def parse_first(char, pos)
	new_states = []
	@parser.first_set.each do \|target\|
	transition(target, char, pos, nil) do \|new_state\|
	new_states << new_state
	end
	end
	new_states
	end

	def parse_next(active_states, char, pos)
	new_states = []
	transition_from_states(active_states, char, pos) do \|new_state\|
	new_states << new_state
	end
	new_states
	end

	def transition_from_states(states, char, pos, &blk)
	states.each do \|state\|
	@transitions[state.parser].each do \|target\|
	transition(target, char, pos, state.left_pos, &blk)
	end
	end
	end

	# Attempt to move into the parser-node, yielding states
	def transition(parser, char, pos, left_pos, &blk)
	case parser
	when :eof
	if char.nil?
	yield State.new(:eof, left_pos)
	end
	when Char
	if parser.matches?(char)
	yield State.new(parser, left_pos)
	end
	when RuleCall
	call = Call.new(parser.rule, pos)
	call_state = @call_states[call]
	if !call_state
	# This is the first time this rule has been called at this posiotn

	# Setup the returns array
	@call_states[call] = call_state = CallState.new

	# ... and kick off the first actions
	parser.rule.force.first_set.each do \|fst\|
	transition(fst, char, pos, pos, &blk)
	end
	end

	# and push the state which we want to complete after the rule succeeds
	call_state.add_return(State.new(parser, left_pos))
	when Rule
	# This implies that a rule is complete

	# Find the call which it corresponds to
	call = Call.new(parser, left_pos)
	call_state = @call_states.fetch(call)

	if call_state.finish(pos)
	# Now we can transition out
	transition_from_states(call_state.returns, char, pos, &blk)
	end
	else
	raise "ehm: #{parser.class}"
	end
	end
	end
	task :default => :test

	desc "Run tests"
	task :test do
	sh 'ruby', 'test.rb'
	end

	file "build" do
	mkdir "build"
	end

	file "build/complexity.txt" => "build" do
	sh 'ruby complexity.rb \| tee build/complexity.txt'
	end

	task :complexity => "build/complexity.txt" do
	sh 'python3', 'complexity.py'
	end
	require_relative 'glush'
	require 'minitest/autorun'

	class TestParser < Minitest::Test
	def recognize?(parser, str)
	m = Matcher.new(parser)
	m.parse(str)
	end

	## Matched parens

	PAREN = Grammar.build {
	lazy \
	def paren
	eps \| (c("(") >> paren >> c(")"))
	end

	paren
	}

	def test_paren_empty
	assert recognize?(PAREN, "")
	end

	def test_paren_one
	assert recognize?(PAREN, "()")
	end

	def test_paren_two
	assert recognize?(PAREN, "(())")
	end

	def test_paren_close_missing
	refute recognize?(PAREN, "(()")
	end

	def test_paren_close_extra
	refute recognize?(PAREN, "())")
	end

	## n+n
	S = Grammar.build {
	lazy \
	def s
	c("n") \| (s >> c("+") >> s)
	end

	s
	}

	def test_s_one
	assert recognize?(S, "n")
	end

	def test_s_two
	assert recognize?(S, "n+n")
	refute recognize?(S, "n+")
	end

	def test_s_three
	assert recognize?(S, "n+n+n")
	refute recognize?(S, "n+n+")
	end

	## Combinators
	A = Grammar.build { c("a").many }
	AP = Grammar.build { c("a").many1 }

	def test_rep
	assert recognize?(A, "")
	assert recognize?(A, "a")
	assert recognize?(A, "aaaaa")

	refute recognize?(AP, "")
	assert recognize?(AP, "a")
	assert recognize?(AP, "aaaaa")
	end

	ONE_LST = Grammar.build {
	def one
	c("1")
	end

	def sep
	c(",") >> c(" ").many
	end

	sep_by(:one, :sep)
	}

	def test_sep_by
	assert recognize?(ONE_LST, "")
	assert recognize?(ONE_LST, "1")
	assert recognize?(ONE_LST, "1,1")
	assert recognize?(ONE_LST, "1, 1")
	refute recognize?(ONE_LST, "1, ")
	end

	ONE_LST_END = Grammar.build {
	def one
	c("1")
	end

	def sep
	c(",") >> c(" ").many
	end

	end_by(:one, :sep)
	}

	def test_end_by
	assert recognize?(ONE_LST_END, "")
	refute recognize?(ONE_LST_END, "1")
	assert recognize?(ONE_LST_END, "1,")
	assert recognize?(ONE_LST_END, "1, 1,")
	assert recognize?(ONE_LST_END, "1, ")
	end
	end