CS Mini-Tutorial: State Machines
## Return true if l is a single letter (or underscore), false if not. | |
def letter?(l) | |
case l | |
when 'a'..'z', 'A'..'Z', '_' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single base-10 digit, false if not. | |
def digit?(l) | |
case l | |
when '0'..'9' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a space, tab, or newline. | |
def space?(l) | |
case l | |
when ' ', '\t', '\n' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single or double quote. | |
def quote?(l) | |
case l | |
when '"', "'" | |
return true | |
else | |
return false | |
end | |
end | |
## Splits the input into tokens and yields them to the given block. | |
# | |
# After we run out of tokens, yield a single nil to show that we're done. | |
def lex(input) | |
# Delete extra spaces from the beginning and end. | |
input.strip! | |
until input.empty? | |
tok = nil # See sanity check below. | |
case input[0] | |
when 'a'..'z', 'A'..'Z', '_' | |
# Split on word boundaries. | |
i = 0 | |
i += 1 while letter?(input[i]) | |
tok = input.slice!(0...i) | |
when '"', "'" | |
# Look for the same kind of quote later in the input. | |
# A more complicated lexer would give you a way to put double quotes | |
# inside other double quotes, usually by putting a backslash first: \". | |
# This one doesn't bother. | |
end_quote_idx = input.index(input[0], 1) | |
raise "missing ending quote" if end_quote_idx.nil? | |
tok = input.slice!(0..end_quote_idx) | |
when '(', ')', ',', ':', '-', '>' | |
# Just take the single character. | |
tok = input.slice!(0) | |
when '0'..'9' | |
i = 0 | |
i += 1 while digit?(input[i]) | |
tok = input.slice!(0...i) | |
else | |
raise "unexpected character: #{input[0]}" | |
end | |
raise "failed to account for a case" if tok.nil? | |
yield tok | |
# We ate a word. The next thing is probably one or more spaces. Delete them! | |
input.lstrip! | |
end | |
# We're done; let our consumer know it. | |
yield nil | |
end | |
if __FILE__ == $0 | |
# For each line in the input, pass it to our lexer and print out the tokens it generates. | |
# Stop when the user puts in a blank line. (Make sure we account for the newline character.) | |
ARGF.each do |line| | |
exit if line.chomp.empty? | |
lex(line) do |token| | |
puts(token) unless token.nil? | |
end | |
puts | |
end | |
end |
require_relative 'lexer' | |
## Returns true if the input is a valid state name: | |
# either a number or quoted text. | |
def valid_state_name?(token) | |
return digit?(token[0]) || quote?(token[0]) | |
end | |
class State | |
def initialize(name, is_start, is_final) | |
@name = name | |
@start = is_start | |
@final = is_final | |
@transitions = [] # Start with no transitions. | |
end | |
def name | |
@name | |
end | |
def start? | |
@start | |
end | |
def final? | |
@final | |
end | |
def transitions | |
@transitions | |
end | |
end | |
## All kinds of transition are one of these. | |
class Transition | |
def destination_name | |
return @destination_name | |
end | |
def destination_name=(dst) | |
raise "destination already set" unless @destination_name.nil? | |
@destination_name = dst | |
end | |
end | |
## Represents "any-transitions" that match against sets. | |
class AnyTransition < Transition | |
def initialize(kind) | |
# Check to make sure this is a kind we know how to handle. | |
case kind | |
when "letter", "number", "space", "symbol" | |
# do nothing | |
else | |
raise "unknown set for 'any': #{kind}" | |
end | |
@kind = kind | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"any #{@kind} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Represents "plain-transitions" that match against text. | |
class PlainTransition < Transition | |
def initialize(quoted_text) | |
@quoted_text = quoted_text | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"#{@quoted_text} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Parses a "new state" production. | |
def parse_state(lexer) | |
# Allowed patterns: | |
# state 1 | |
# start state 2 | |
# final state 3 | |
# start final state 4 | |
is_start = false | |
is_final = false | |
token = lexer.next() | |
if token == "start" | |
is_start = true | |
token = lexer.next() | |
end | |
if token == "final" | |
is_final = true | |
token = lexer.next() | |
end | |
if token == "state" | |
# Ignore the word "state"; it's optional when you have "start" or "final". | |
token = lexer.next() | |
end | |
raise "expected new state label, but got '#{token}'" unless valid_state_name?(token) | |
colon = lexer.next() | |
raise "expected colon after state, but got '#{colon}'" unless colon == ':' | |
return State.new(token, is_start, is_final) | |
end | |
## Parses a "new transition" production. | |
def parse_transition(lexer) | |
# Allowed patterns: | |
# any letter -> 1 | |
# "abc" -> 2 | |
# 'def' -> 3 | |
transition_label = lexer.next() | |
if transition_label == "any" | |
kind = lexer.next() | |
transition = AnyTransition.new(kind) | |
else | |
raise "expected transition label, but got '#{transition_label}" unless quote?(transition_label[0]) | |
transition = PlainTransition.new(transition_label) | |
end | |
arrow_start = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}'" unless arrow_start == '-' | |
arrow_end = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}#{arrow_end}'" unless arrow_end == '>' | |
destination_name = lexer.next() | |
if destination_name == "error" | |
# Leave the destination empty in the transition to mean "error". | |
else | |
raise "expected destination state label, but got '#{destination_name}'" unless valid_state_name?(destination_name) | |
transition.destination_name = destination_name | |
end | |
return transition | |
end | |
## Given text input, parse it into a list of states, each with a list of transitions. | |
def parse(input) | |
# Use our lex() function from last time, | |
# but put it in a form where we can get words on demand. | |
lexer = enum_for(:lex, input) | |
# Start with no states and no current state. | |
states = [] | |
current_state = nil | |
while true | |
# Look at the first token from the lexer, but don't consume it yet. | |
first_token = lexer.peek() | |
# If we're done, jump out of the loop! | |
break if first_token.nil? | |
# Are we trying to make a new state or a new transition? | |
case first_token | |
when "state", "start", "final" | |
current_state = parse_state(lexer) | |
states << current_state | |
else | |
new_transition = parse_transition(lexer) | |
raise "no current state" if current_state.nil? | |
current_state.transitions << new_transition | |
end | |
end | |
return states | |
end | |
if __FILE__ == $0 | |
# Parse the entire input file, then print out each resulting state. | |
parse(ARGF.read()).each do |state| | |
# First print the state's name and attributes. | |
print("#{state.name}") | |
if state.start? && state.final? | |
print(" (start, final)") | |
elsif state.start? | |
print(" (start)") | |
elsif state.final? | |
print(" (final)") | |
end | |
# What's the difference between 'print' and 'puts'? | |
# The latter adds a line break; the former doesn't. | |
puts(":") | |
# Then print all its transitions. | |
state.transitions.each do |transition| | |
puts("\t#{transition.to_s}") | |
end | |
puts | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment