-
-
Save belkadan/29480faa7b045b121bdd to your computer and use it in GitHub Desktop.
CS Mini-Tutorial: State Machines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Return true if l is a single letter (or underscore), false if not. | |
def letter?(l) | |
case l | |
when 'a'..'z', 'A'..'Z', '_' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single base-10 digit, false if not. | |
def digit?(l) | |
case l | |
when '0'..'9' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a space, tab, or newline. | |
def space?(l) | |
case l | |
when ' ', '\t', '\n' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single or double quote. | |
def quote?(l) | |
case l | |
when '"', "'" | |
return true | |
else | |
return false | |
end | |
end | |
## Splits the input into tokens and yields them to the given block. | |
# | |
# After we run out of tokens, yield a single nil to show that we're done. | |
def lex(input) | |
# Delete extra spaces from the beginning and end. | |
input.strip! | |
until input.empty? | |
tok = nil # See sanity check below. | |
case input[0] | |
when 'a'..'z', 'A'..'Z', '_' | |
# Split on word boundaries. | |
i = 0 | |
i += 1 while letter?(input[i]) | |
tok = input.slice!(0...i) | |
when '"', "'" | |
# Look for the same kind of quote later in the input. | |
# A more complicated lexer would give you a way to put double quotes | |
# inside other double quotes, usually by putting a backslash first: \". | |
# This one doesn't bother. | |
end_quote_idx = input.index(input[0], 1) | |
raise "missing ending quote" if end_quote_idx.nil? | |
tok = input.slice!(0..end_quote_idx) | |
when '(', ')', ',', ':', '-', '>' | |
# Just take the single character. | |
tok = input.slice!(0) | |
when '0'..'9' | |
i = 0 | |
i += 1 while digit?(input[i]) | |
tok = input.slice!(0...i) | |
else | |
raise "unexpected character: #{input[0]}" | |
end | |
raise "failed to account for a case" if tok.nil? | |
yield tok | |
# We ate a word. The next thing is probably one or more spaces. Delete them! | |
input.lstrip! | |
end | |
# We're done; let our consumer know it. | |
yield nil | |
end | |
if __FILE__ == $0 | |
# For each line in the input, pass it to our lexer and print out the tokens it generates. | |
# Stop when the user puts in a blank line. (Make sure we account for the newline character.) | |
ARGF.each do |line| | |
exit if line.chomp.empty? | |
lex(line) do |token| | |
puts(token) unless token.nil? | |
end | |
puts | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'lexer' | |
## Returns true if the input is a valid state name: | |
# either a number or quoted text. | |
def valid_state_name?(token) | |
return digit?(token[0]) || quote?(token[0]) | |
end | |
class State | |
def initialize(name, is_start, is_final) | |
@name = name | |
@start = is_start | |
@final = is_final | |
@transitions = [] # Start with no transitions. | |
end | |
def name | |
@name | |
end | |
def start? | |
@start | |
end | |
def final? | |
@final | |
end | |
def transitions | |
@transitions | |
end | |
end | |
## All kinds of transition are one of these. | |
class Transition | |
def destination_name | |
return @destination_name | |
end | |
def destination_name=(dst) | |
raise "destination already set" unless @destination_name.nil? | |
@destination_name = dst | |
end | |
end | |
## Represents "any-transitions" that match against sets. | |
class AnyTransition < Transition | |
def initialize(kind) | |
# Check to make sure this is a kind we know how to handle. | |
case kind | |
when "letter", "number", "space", "symbol" | |
# do nothing | |
else | |
raise "unknown set for 'any': #{kind}" | |
end | |
@kind = kind | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"any #{@kind} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Represents "plain-transitions" that match against text. | |
class PlainTransition < Transition | |
def initialize(quoted_text) | |
@quoted_text = quoted_text | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"#{@quoted_text} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Parses a "new state" production. | |
def parse_state(lexer) | |
# Allowed patterns: | |
# state 1 | |
# start state 2 | |
# final state 3 | |
# start final state 4 | |
is_start = false | |
is_final = false | |
token = lexer.next() | |
if token == "start" | |
is_start = true | |
token = lexer.next() | |
end | |
if token == "final" | |
is_final = true | |
token = lexer.next() | |
end | |
if token == "state" | |
# Ignore the word "state"; it's optional when you have "start" or "final". | |
token = lexer.next() | |
end | |
raise "expected new state label, but got '#{token}'" unless valid_state_name?(token) | |
colon = lexer.next() | |
raise "expected colon after state, but got '#{colon}'" unless colon == ':' | |
return State.new(token, is_start, is_final) | |
end | |
## Parses a "new transition" production. | |
def parse_transition(lexer) | |
# Allowed patterns: | |
# any letter -> 1 | |
# "abc" -> 2 | |
# 'def' -> 3 | |
transition_label = lexer.next() | |
if transition_label == "any" | |
kind = lexer.next() | |
transition = AnyTransition.new(kind) | |
else | |
raise "expected transition label, but got '#{transition_label}" unless quote?(transition_label[0]) | |
transition = PlainTransition.new(transition_label) | |
end | |
arrow_start = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}'" unless arrow_start == '-' | |
arrow_end = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}#{arrow_end}'" unless arrow_end == '>' | |
destination_name = lexer.next() | |
if destination_name == "error" | |
# Leave the destination empty in the transition to mean "error". | |
else | |
raise "expected destination state label, but got '#{destination_name}'" unless valid_state_name?(destination_name) | |
transition.destination_name = destination_name | |
end | |
return transition | |
end | |
## Given text input, parse it into a list of states, each with a list of transitions. | |
def parse(input) | |
# Use our lex() function from last time, | |
# but put it in a form where we can get words on demand. | |
lexer = enum_for(:lex, input) | |
# Start with no states and no current state. | |
states = [] | |
current_state = nil | |
while true | |
# Look at the first token from the lexer, but don't consume it yet. | |
first_token = lexer.peek() | |
# If we're done, jump out of the loop! | |
break if first_token.nil? | |
# Are we trying to make a new state or a new transition? | |
case first_token | |
when "state", "start", "final" | |
current_state = parse_state(lexer) | |
states << current_state | |
else | |
new_transition = parse_transition(lexer) | |
raise "no current state" if current_state.nil? | |
current_state.transitions << new_transition | |
end | |
end | |
return states | |
end | |
if __FILE__ == $0 | |
# Parse the entire input file, then print out each resulting state. | |
parse(ARGF.read()).each do |state| | |
# First print the state's name and attributes. | |
print("#{state.name}") | |
if state.start? && state.final? | |
print(" (start, final)") | |
elsif state.start? | |
print(" (start)") | |
elsif state.final? | |
print(" (final)") | |
end | |
# What's the difference between 'print' and 'puts'? | |
# The latter adds a line break; the former doesn't. | |
puts(":") | |
# Then print all its transitions. | |
state.transitions.each do |transition| | |
puts("\t#{transition.to_s}") | |
end | |
puts | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note that the state machine knows nothing about the lexer! | |
require_relative 'parser' | |
class PlainTransition | |
# A plain transition matches input if the input starts with the text. | |
def match(input) | |
# Index 0 is the first letter, which we know is a quote. | |
# Index -1 is the last letter, which should also be a quote. | |
# So we test letters 1 through -2... | |
if input.start_with?(@quoted_text[1..-2]) | |
# ...and chop off the first <length> letters from the input if there's a match. | |
return input[(@quoted_text.length-2)..-1] | |
end | |
# Otherwise, return the special "nil" value for "no match". | |
return nil | |
end | |
end | |
class AnyTransition | |
def match(input) | |
# Chop off the first letter if we get a match. | |
case @kind | |
when "letter" | |
return input[1..-1] if letter?(input[0]) | |
when "number" | |
return input[1..-1] if digit?(input[0]) | |
when "space" | |
return input[1..-1] if space?(input[0]) | |
when "symbol" | |
return input[1..-1] unless letter?(input[0]) || digit?(input[0]) || space?(input[0]) | |
end | |
return nil | |
end | |
end | |
class StateMachine | |
def initialize(states, whitespace: true) | |
@whitespace_sensitive = whitespace | |
# Use a mapping from state names to State objects, | |
# rather than just a list. | |
@states = {} | |
# Add all the states to the map. | |
states.each do |state| | |
raise "duplicate state #{state.name}" unless @states[state.name].nil? | |
@states[state.name] = state | |
if state.start? | |
raise "multiple start states: #{@start_state.name} and #{state.name}" unless @start_state.nil? | |
@start_state = state | |
end | |
end | |
# We should have exactly one start state. | |
raise "missing start state" if @start_state.nil? | |
# Check all the transitions. | |
states.each do |state| | |
state.transitions.each do |transition| | |
destination = transition.destination_name | |
next if destination.nil? # A nil name represents an error transition. | |
raise "destination state #{destination} doesn't exist" if @states[destination].nil? | |
end | |
end | |
end | |
def match(input, verbose: false) | |
@current_state = @start_state | |
until input.empty? | |
input = input.lstrip unless @whitespace_sensitive | |
puts "\tstate #{self.current_state.name}, input '#{input}'" if verbose | |
input = transition(input) | |
end | |
return final? | |
end | |
def transition(input) | |
# Find the first transition that matches the input. | |
remaining_input = nil | |
transition_to_take = @current_state.transitions.find do |transition| | |
remaining_input = transition.match(input) | |
!remaining_input.nil? # If the match attempt didn't return "nil", it succeeded! | |
end | |
if transition_to_take.nil? | |
@current_state = nil | |
else | |
@current_state = @states[transition_to_take.destination_name] | |
end | |
# Stop processing input if we're now in an error state. | |
return "" if @current_state.nil? | |
return remaining_input | |
end | |
def current_state | |
@current_state | |
end | |
def error? | |
@current_state.nil? | |
end | |
def final? | |
!error? && @current_state.final? | |
end | |
end | |
if __FILE__ == $0 | |
# Parse the entire input file and build a state machine. | |
machine = StateMachine.new(parse(ARGF.read())) | |
# Then try to match each line of input. | |
STDIN.each do |input| | |
input.chomp!() # Cut off the trailing newline. | |
machine.match(input, verbose: true) | |
if machine.final? | |
puts "Match! (final state #{machine.current_state.name})" | |
elsif machine.error? | |
puts "No match (error state)" | |
else | |
puts "No match (ended in state #{machine.current_state.name})" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment