belkadan/lexer.rb Secret

## lexer.rb
## Return true if l is a single letter (or underscore), false if not.
def letter?(l)
  case l
  when 'a'..'z', 'A'..'Z', '_'
    return true
  else
    return false
  end
end

## Return true if l is a single base-10 digit, false if not.
def digit?(l)
  case l
  when '0'..'9'
    return true
  else
    return false
  end
end

## Return true if l is a space, tab, or newline.
def space?(l)
  case l
  when ' ', '\t', '\n'
    return true
  else
    return false
  end
end

## Return true if l is a single or double quote.
def quote?(l)
  case l
  when '"', "'"
    return true
  else
    return false
  end
end

## Splits the input into tokens and yields them to the given block.
#
# After we run out of tokens, yield a single nil to show that we're done.
def lex(input)
  # Delete extra spaces from the beginning and end.
  input.strip!
  until input.empty?
    tok = nil # See sanity check below.

    case input[0]
    when 'a'..'z', 'A'..'Z', '_'
      # Split on word boundaries.
      i = 0
      i += 1 while letter?(input[i])
      tok = input.slice!(0...i)

    when '"', "'"
      # Look for the same kind of quote later in the input.
      # A more complicated lexer would give you a way to put double quotes
      # inside other double quotes, usually by putting a backslash first: \".
      # This one doesn't bother.
      end_quote_idx = input.index(input[0], 1)
      raise "missing ending quote" if end_quote_idx.nil?
      tok = input.slice!(0..end_quote_idx)

    when '(', ')', ',', ':', '-', '>'
      # Just take the single character.
      tok = input.slice!(0)

    when '0'..'9'
      i = 0
      i += 1 while digit?(input[i])
      tok = input.slice!(0...i)

    else
      raise "unexpected character: #{input[0]}"
    end

    raise "failed to account for a case" if tok.nil?
    yield tok

    # We ate a word. The next thing is probably one or more spaces. Delete them!
    input.lstrip!
  end

  # We're done; let our consumer know it.
  yield nil
end

if __FILE__ == $0
  # For each line in the input, pass it to our lexer and print out the tokens it generates.
  # Stop when the user puts in a blank line. (Make sure we account for the newline character.)
  ARGF.each do |line|
    exit if line.chomp.empty?
    lex(line) do |token|
      puts(token) unless token.nil?
    end
    puts
  end
end

## parser.rb
require_relative 'lexer'

## Returns true if the input is a valid state name:
# either a number or quoted text.
def valid_state_name?(token)
  return digit?(token[0]) || quote?(token[0])
end

class State
  def initialize(name, is_start, is_final)
    @name = name
    @start = is_start
    @final = is_final
    @transitions = [] # Start with no transitions.
  end

  def name
    @name
  end
  def start?
    @start
  end
  def final?
    @final
  end
  def transitions
    @transitions
  end
end

## All kinds of transition are one of these.
class Transition
  def destination_name
    return @destination_name
  end
  def destination_name=(dst)
    raise "destination already set" unless @destination_name.nil?
    @destination_name = dst
  end
end

## Represents "any-transitions" that match against sets.
class AnyTransition < Transition
  def initialize(kind)
    # Check to make sure this is a kind we know how to handle.
    case kind
    when "letter", "number", "space", "symbol"
      # do nothing
    else
      raise "unknown set for 'any': #{kind}"
    end
    @kind = kind
  end

  # Get a textual representation of this transition.
  def to_s
    "any #{@kind} -> #{self.destination_name || 'error'}"
  end
end

## Represents "plain-transitions" that match against text.
class PlainTransition < Transition
  def initialize(quoted_text)
    @quoted_text = quoted_text
  end

  # Get a textual representation of this transition.
  def to_s
    "#{@quoted_text} -> #{self.destination_name || 'error'}"
  end
end

## Parses a "new state" production.
def parse_state(lexer)
  # Allowed patterns:
  # state 1
  # start state 2
  # final state 3
  # start final state 4
  is_start = false
  is_final = false

  token = lexer.next()
  if token == "start"
    is_start = true
    token = lexer.next()
  end
  if token == "final"
    is_final = true
    token = lexer.next()
  end
  if token == "state"
    # Ignore the word "state"; it's optional when you have "start" or "final".
    token = lexer.next()
  end

  raise "expected new state label, but got '#{token}'" unless valid_state_name?(token)
  colon = lexer.next()
  raise "expected colon after state, but got '#{colon}'" unless colon == ':'
  return State.new(token, is_start, is_final)
end

## Parses a "new transition" production.
def parse_transition(lexer)
  # Allowed patterns:
  # any letter -> 1
  # "abc" -> 2
  # 'def' -> 3
  transition_label = lexer.next()
  if transition_label == "any"
    kind = lexer.next()
    transition = AnyTransition.new(kind)
  else
    raise "expected transition label, but got '#{transition_label}" unless quote?(transition_label[0])
    transition = PlainTransition.new(transition_label)
  end

  arrow_start = lexer.next()
  raise "expected -> after transition label, but got '#{arrow_start}'" unless arrow_start == '-'
  arrow_end = lexer.next()
  raise "expected -> after transition label, but got '#{arrow_start}#{arrow_end}'" unless arrow_end == '>'

  destination_name = lexer.next()
  if destination_name == "error"
    # Leave the destination empty in the transition to mean "error".
  else
    raise "expected destination state label, but got '#{destination_name}'" unless valid_state_name?(destination_name)
    transition.destination_name = destination_name
  end

  return transition
end

## Given text input, parse it into a list of states, each with a list of transitions.
def parse(input)
  # Use our lex() function from last time,
  # but put it in a form where we can get words on demand.
  lexer = enum_for(:lex, input)

  # Start with no states and no current state.
  states = []
  current_state = nil

  while true
    # Look at the first token from the lexer, but don't consume it yet.
    first_token = lexer.peek()

    # If we're done, jump out of the loop!
    break if first_token.nil?

    # Are we trying to make a new state or a new transition?
    case first_token
    when "state", "start", "final"
      current_state = parse_state(lexer)
      states << current_state
    else
      new_transition = parse_transition(lexer)
      raise "no current state" if current_state.nil?
      current_state.transitions << new_transition
    end
  end

  return states
end

if __FILE__ == $0
  # Parse the entire input file, then print out each resulting state.
  parse(ARGF.read()).each do |state|
    # First print the state's name and attributes.
    print("#{state.name}")
    if state.start? && state.final?
      print(" (start, final)")
    elsif state.start?
      print(" (start)")
    elsif state.final?
      print(" (final)")
    end

    # What's the difference between 'print' and 'puts'?
    # The latter adds a line break; the former doesn't.
    puts(":")

    # Then print all its transitions.
    state.transitions.each do |transition|
      puts("\t#{transition.to_s}")
    end
    puts
  end
end

## state_machine.rb
# Note that the state machine knows nothing about the lexer!
require_relative 'parser'

class PlainTransition
  # A plain transition matches input if the input starts with the text.
  def match(input)
    # Index 0 is the first letter, which we know is a quote.
    # Index -1 is the last letter, which should also be a quote.
    # So we test letters 1 through -2...
    if input.start_with?(@quoted_text[1..-2])
      # ...and chop off the first <length> letters from the input if there's a match.
      return input[(@quoted_text.length-2)..-1]
    end
    # Otherwise, return the special "nil" value for "no match".
    return nil
  end
end

class AnyTransition
  def match(input)
    # Chop off the first letter if we get a match.
    case @kind
    when "letter"
      return input[1..-1] if letter?(input[0])
    when "number"
      return input[1..-1] if digit?(input[0])
    when "space"
      return input[1..-1] if space?(input[0])
    when "symbol"
      return input[1..-1] unless letter?(input[0]) || digit?(input[0]) || space?(input[0])
    end
    return nil
  end
end

class StateMachine
  def initialize(states, whitespace: true)
    @whitespace_sensitive = whitespace

    # Use a mapping from state names to State objects,
    # rather than just a list.
    @states = {}

    # Add all the states to the map.
    states.each do |state|
      raise "duplicate state #{state.name}" unless @states[state.name].nil?
      @states[state.name] = state

      if state.start?
        raise "multiple start states: #{@start_state.name} and #{state.name}" unless @start_state.nil?
        @start_state = state
      end
    end

    # We should have exactly one start state.
    raise "missing start state" if @start_state.nil?

    # Check all the transitions.
    states.each do |state|
      state.transitions.each do |transition|
        destination = transition.destination_name
        next if destination.nil? # A nil name represents an error transition.
        raise "destination state #{destination} doesn't exist" if @states[destination].nil?
      end
    end
  end

  def match(input, verbose: false)
    @current_state = @start_state
    until input.empty?
      input = input.lstrip unless @whitespace_sensitive
      puts "\tstate #{self.current_state.name}, input '#{input}'" if verbose
      input = transition(input)
    end
    return final?
  end

  def transition(input)
    # Find the first transition that matches the input.
    remaining_input = nil
    transition_to_take = @current_state.transitions.find do |transition|
      remaining_input = transition.match(input)
      !remaining_input.nil? # If the match attempt didn't return "nil", it succeeded!
    end

    if transition_to_take.nil?
      @current_state = nil
    else
      @current_state = @states[transition_to_take.destination_name]
    end

    # Stop processing input if we're now in an error state.
    return "" if @current_state.nil?

    return remaining_input
  end

  def current_state
    @current_state
  end

  def error?
    @current_state.nil?
  end

  def final?
    !error? && @current_state.final?
  end
end

if __FILE__ == $0
  # Parse the entire input file and build a state machine.
  machine = StateMachine.new(parse(ARGF.read()))

  # Then try to match each line of input.
  STDIN.each do |input|
    input.chomp!() # Cut off the trailing newline.
    machine.match(input, verbose: true)

    if machine.final?
      puts "Match! (final state #{machine.current_state.name})"
    elsif machine.error?
      puts "No match (error state)"
    else
      puts "No match (ended in state #{machine.current_state.name})"
    end
  end
end
	## Return true if l is a single letter (or underscore), false if not.
	def letter?(l)
	case l
	when 'a'..'z', 'A'..'Z', '_'
	return true
	else
	return false
	end
	end

	## Return true if l is a single base-10 digit, false if not.
	def digit?(l)
	case l
	when '0'..'9'
	return true
	else
	return false
	end
	end

	## Return true if l is a space, tab, or newline.
	def space?(l)
	case l
	when ' ', '\t', '\n'
	return true
	else
	return false
	end
	end

	## Return true if l is a single or double quote.
	def quote?(l)
	case l
	when '"', "'"
	return true
	else
	return false
	end
	end

	## Splits the input into tokens and yields them to the given block.
	#
	# After we run out of tokens, yield a single nil to show that we're done.
	def lex(input)
	# Delete extra spaces from the beginning and end.
	input.strip!
	until input.empty?
	tok = nil # See sanity check below.

	case input[0]
	when 'a'..'z', 'A'..'Z', '_'
	# Split on word boundaries.
	i = 0
	i += 1 while letter?(input[i])
	tok = input.slice!(0...i)

	when '"', "'"
	# Look for the same kind of quote later in the input.
	# A more complicated lexer would give you a way to put double quotes
	# inside other double quotes, usually by putting a backslash first: \".
	# This one doesn't bother.
	end_quote_idx = input.index(input[0], 1)
	raise "missing ending quote" if end_quote_idx.nil?
	tok = input.slice!(0..end_quote_idx)

	when '(', ')', ',', ':', '-', '>'
	# Just take the single character.
	tok = input.slice!(0)

	when '0'..'9'
	i = 0
	i += 1 while digit?(input[i])
	tok = input.slice!(0...i)

	else
	raise "unexpected character: #{input[0]}"
	end

	raise "failed to account for a case" if tok.nil?
	yield tok

	# We ate a word. The next thing is probably one or more spaces. Delete them!
	input.lstrip!
	end

	# We're done; let our consumer know it.
	yield nil
	end

	if __FILE__ == $0
	# For each line in the input, pass it to our lexer and print out the tokens it generates.
	# Stop when the user puts in a blank line. (Make sure we account for the newline character.)
	ARGF.each do \|line\|
	exit if line.chomp.empty?
	lex(line) do \|token\|
	puts(token) unless token.nil?
	end
	puts
	end
	end
	require_relative 'lexer'

	## Returns true if the input is a valid state name:
	# either a number or quoted text.
	def valid_state_name?(token)
	return digit?(token[0]) \|\| quote?(token[0])
	end

	class State
	def initialize(name, is_start, is_final)
	@name = name
	@start = is_start
	@final = is_final
	@transitions = [] # Start with no transitions.
	end

	def name
	@name
	end
	def start?
	@start
	end
	def final?
	@final
	end
	def transitions
	@transitions
	end
	end

	## All kinds of transition are one of these.
	class Transition
	def destination_name
	return @destination_name
	end
	def destination_name=(dst)
	raise "destination already set" unless @destination_name.nil?
	@destination_name = dst
	end
	end

	## Represents "any-transitions" that match against sets.
	class AnyTransition < Transition
	def initialize(kind)
	# Check to make sure this is a kind we know how to handle.
	case kind
	when "letter", "number", "space", "symbol"
	# do nothing
	else
	raise "unknown set for 'any': #{kind}"
	end
	@kind = kind
	end

	# Get a textual representation of this transition.
	def to_s
	"any #{@kind} -> #{self.destination_name \|\| 'error'}"
	end
	end

	## Represents "plain-transitions" that match against text.
	class PlainTransition < Transition
	def initialize(quoted_text)
	@quoted_text = quoted_text
	end

	# Get a textual representation of this transition.
	def to_s
	"#{@quoted_text} -> #{self.destination_name \|\| 'error'}"
	end
	end

	## Parses a "new state" production.
	def parse_state(lexer)
	# Allowed patterns:
	# state 1
	# start state 2
	# final state 3
	# start final state 4
	is_start = false
	is_final = false

	token = lexer.next()
	if token == "start"
	is_start = true
	token = lexer.next()
	end
	if token == "final"
	is_final = true
	token = lexer.next()
	end
	if token == "state"
	# Ignore the word "state"; it's optional when you have "start" or "final".
	token = lexer.next()
	end

	raise "expected new state label, but got '#{token}'" unless valid_state_name?(token)
	colon = lexer.next()
	raise "expected colon after state, but got '#{colon}'" unless colon == ':'
	return State.new(token, is_start, is_final)
	end

	## Parses a "new transition" production.
	def parse_transition(lexer)
	# Allowed patterns:
	# any letter -> 1
	# "abc" -> 2
	# 'def' -> 3
	transition_label = lexer.next()
	if transition_label == "any"
	kind = lexer.next()
	transition = AnyTransition.new(kind)
	else
	raise "expected transition label, but got '#{transition_label}" unless quote?(transition_label[0])
	transition = PlainTransition.new(transition_label)
	end

	arrow_start = lexer.next()
	raise "expected -> after transition label, but got '#{arrow_start}'" unless arrow_start == '-'
	arrow_end = lexer.next()
	raise "expected -> after transition label, but got '#{arrow_start}#{arrow_end}'" unless arrow_end == '>'

	destination_name = lexer.next()
	if destination_name == "error"
	# Leave the destination empty in the transition to mean "error".
	else
	raise "expected destination state label, but got '#{destination_name}'" unless valid_state_name?(destination_name)
	transition.destination_name = destination_name
	end

	return transition
	end

	## Given text input, parse it into a list of states, each with a list of transitions.
	def parse(input)
	# Use our lex() function from last time,
	# but put it in a form where we can get words on demand.
	lexer = enum_for(:lex, input)

	# Start with no states and no current state.
	states = []
	current_state = nil

	while true
	# Look at the first token from the lexer, but don't consume it yet.
	first_token = lexer.peek()

	# If we're done, jump out of the loop!
	break if first_token.nil?

	# Are we trying to make a new state or a new transition?
	case first_token
	when "state", "start", "final"
	current_state = parse_state(lexer)
	states << current_state
	else
	new_transition = parse_transition(lexer)
	raise "no current state" if current_state.nil?
	current_state.transitions << new_transition
	end
	end

	return states
	end

	if __FILE__ == $0
	# Parse the entire input file, then print out each resulting state.
	parse(ARGF.read()).each do \|state\|
	# First print the state's name and attributes.
	print("#{state.name}")
	if state.start? && state.final?
	print(" (start, final)")
	elsif state.start?
	print(" (start)")
	elsif state.final?
	print(" (final)")
	end

	# What's the difference between 'print' and 'puts'?
	# The latter adds a line break; the former doesn't.
	puts(":")

	# Then print all its transitions.
	state.transitions.each do \|transition\|
	puts("\t#{transition.to_s}")
	end
	puts
	end
	end
	# Note that the state machine knows nothing about the lexer!
	require_relative 'parser'

	class PlainTransition
	# A plain transition matches input if the input starts with the text.
	def match(input)
	# Index 0 is the first letter, which we know is a quote.
	# Index -1 is the last letter, which should also be a quote.
	# So we test letters 1 through -2...
	if input.start_with?(@quoted_text[1..-2])
	# ...and chop off the first <length> letters from the input if there's a match.
	return input[(@quoted_text.length-2)..-1]
	end
	# Otherwise, return the special "nil" value for "no match".
	return nil
	end
	end

	class AnyTransition
	def match(input)
	# Chop off the first letter if we get a match.
	case @kind
	when "letter"
	return input[1..-1] if letter?(input[0])
	when "number"
	return input[1..-1] if digit?(input[0])
	when "space"
	return input[1..-1] if space?(input[0])
	when "symbol"
	return input[1..-1] unless letter?(input[0]) \|\| digit?(input[0]) \|\| space?(input[0])
	end
	return nil
	end
	end

	class StateMachine
	def initialize(states, whitespace: true)
	@whitespace_sensitive = whitespace

	# Use a mapping from state names to State objects,
	# rather than just a list.
	@states = {}

	# Add all the states to the map.
	states.each do \|state\|
	raise "duplicate state #{state.name}" unless @states[state.name].nil?
	@states[state.name] = state

	if state.start?
	raise "multiple start states: #{@start_state.name} and #{state.name}" unless @start_state.nil?
	@start_state = state
	end
	end

	# We should have exactly one start state.
	raise "missing start state" if @start_state.nil?

	# Check all the transitions.
	states.each do \|state\|
	state.transitions.each do \|transition\|
	destination = transition.destination_name
	next if destination.nil? # A nil name represents an error transition.
	raise "destination state #{destination} doesn't exist" if @states[destination].nil?
	end
	end
	end

	def match(input, verbose: false)
	@current_state = @start_state
	until input.empty?
	input = input.lstrip unless @whitespace_sensitive
	puts "\tstate #{self.current_state.name}, input '#{input}'" if verbose
	input = transition(input)
	end
	return final?
	end

	def transition(input)
	# Find the first transition that matches the input.
	remaining_input = nil
	transition_to_take = @current_state.transitions.find do \|transition\|
	remaining_input = transition.match(input)
	!remaining_input.nil? # If the match attempt didn't return "nil", it succeeded!
	end

	if transition_to_take.nil?
	@current_state = nil
	else
	@current_state = @states[transition_to_take.destination_name]
	end

	# Stop processing input if we're now in an error state.
	return "" if @current_state.nil?

	return remaining_input
	end

	def current_state
	@current_state
	end

	def error?
	@current_state.nil?
	end

	def final?
	!error? && @current_state.final?
	end
	end

	if __FILE__ == $0
	# Parse the entire input file and build a state machine.
	machine = StateMachine.new(parse(ARGF.read()))

	# Then try to match each line of input.
	STDIN.each do \|input\|
	input.chomp!() # Cut off the trailing newline.
	machine.match(input, verbose: true)

	if machine.final?
	puts "Match! (final state #{machine.current_state.name})"
	elsif machine.error?
	puts "No match (error state)"
	else
	puts "No match (ended in state #{machine.current_state.name})"
	end
	end
	end