-
-
Save belkadan/29480faa7b045b121bdd to your computer and use it in GitHub Desktop.
CS Mini-Tutorial: State Machines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Return true if l is a single letter (or underscore), false if not. | |
def letter?(l) | |
case l | |
when 'a'..'z', 'A'..'Z', '_' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single base-10 digit, false if not. | |
def digit?(l) | |
case l | |
when '0'..'9' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a space, tab, or newline. | |
def space?(l) | |
case l | |
when ' ', '\t', '\n' | |
return true | |
else | |
return false | |
end | |
end | |
## Return true if l is a single or double quote. | |
def quote?(l) | |
case l | |
when '"', "'" | |
return true | |
else | |
return false | |
end | |
end | |
## Splits the input into tokens and yields them to the given block. | |
# | |
# After we run out of tokens, yield a single nil to show that we're done. | |
def lex(input) | |
# Delete extra spaces from the beginning and end. | |
input.strip! | |
until input.empty? | |
tok = nil # See sanity check below. | |
case input[0] | |
when 'a'..'z', 'A'..'Z', '_' | |
# Split on word boundaries. | |
i = 0 | |
i += 1 while letter?(input[i]) | |
tok = input.slice!(0...i) | |
when '"', "'" | |
# Look for the same kind of quote later in the input. | |
# A more complicated lexer would give you a way to put double quotes | |
# inside other double quotes, usually by putting a backslash first: \". | |
# This one doesn't bother. | |
end_quote_idx = input.index(input[0], 1) | |
raise "missing ending quote" if end_quote_idx.nil? | |
tok = input.slice!(0..end_quote_idx) | |
when '(', ')', ',', ':', '-', '>' | |
# Just take the single character. | |
tok = input.slice!(0) | |
when '0'..'9' | |
i = 0 | |
i += 1 while digit?(input[i]) | |
tok = input.slice!(0...i) | |
else | |
raise "unexpected character: #{input[0]}" | |
end | |
raise "failed to account for a case" if tok.nil? | |
yield tok | |
# We ate a word. The next thing is probably one or more spaces. Delete them! | |
input.lstrip! | |
end | |
# We're done; let our consumer know it. | |
yield nil | |
end | |
if __FILE__ == $0 | |
# For each line in the input, pass it to our lexer and print out the tokens it generates. | |
# Stop when the user puts in a blank line. (Make sure we account for the newline character.) | |
ARGF.each do |line| | |
exit if line.chomp.empty? | |
lex(line) do |token| | |
puts(token) unless token.nil? | |
end | |
puts | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'lexer' | |
## Returns true if the input is a valid state name: | |
# either a number or quoted text. | |
def valid_state_name?(token) | |
return digit?(token[0]) || quote?(token[0]) | |
end | |
class State | |
def initialize(name, is_start, is_final) | |
@name = name | |
@start = is_start | |
@final = is_final | |
@transitions = [] # Start with no transitions. | |
end | |
def name | |
@name | |
end | |
def start? | |
@start | |
end | |
def final? | |
@final | |
end | |
def transitions | |
@transitions | |
end | |
end | |
## All kinds of transition are one of these. | |
class Transition | |
def destination_name | |
return @destination_name | |
end | |
def destination_name=(dst) | |
raise "destination already set" unless @destination_name.nil? | |
@destination_name = dst | |
end | |
end | |
## Represents "any-transitions" that match against sets. | |
class AnyTransition < Transition | |
def initialize(kind) | |
# Check to make sure this is a kind we know how to handle. | |
case kind | |
when "letter", "number", "space", "symbol" | |
# do nothing | |
else | |
raise "unknown set for 'any': #{kind}" | |
end | |
@kind = kind | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"any #{@kind} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Represents "plain-transitions" that match against text. | |
class PlainTransition < Transition | |
def initialize(quoted_text) | |
@quoted_text = quoted_text | |
end | |
# Get a textual representation of this transition. | |
def to_s | |
"#{@quoted_text} -> #{self.destination_name || 'error'}" | |
end | |
end | |
## Parses a "new state" production. | |
def parse_state(lexer) | |
# Allowed patterns: | |
# state 1 | |
# start state 2 | |
# final state 3 | |
# start final state 4 | |
is_start = false | |
is_final = false | |
token = lexer.next() | |
if token == "start" | |
is_start = true | |
token = lexer.next() | |
end | |
if token == "final" | |
is_final = true | |
token = lexer.next() | |
end | |
if token == "state" | |
# Ignore the word "state"; it's optional when you have "start" or "final". | |
token = lexer.next() | |
end | |
raise "expected new state label, but got '#{token}'" unless valid_state_name?(token) | |
colon = lexer.next() | |
raise "expected colon after state, but got '#{colon}'" unless colon == ':' | |
return State.new(token, is_start, is_final) | |
end | |
## Parses a "new transition" production. | |
def parse_transition(lexer) | |
# Allowed patterns: | |
# any letter -> 1 | |
# "abc" -> 2 | |
# 'def' -> 3 | |
transition_label = lexer.next() | |
if transition_label == "any" | |
kind = lexer.next() | |
transition = AnyTransition.new(kind) | |
else | |
raise "expected transition label, but got '#{transition_label}" unless quote?(transition_label[0]) | |
transition = PlainTransition.new(transition_label) | |
end | |
arrow_start = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}'" unless arrow_start == '-' | |
arrow_end = lexer.next() | |
raise "expected -> after transition label, but got '#{arrow_start}#{arrow_end}'" unless arrow_end == '>' | |
destination_name = lexer.next() | |
if destination_name == "error" | |
# Leave the destination empty in the transition to mean "error". | |
else | |
raise "expected destination state label, but got '#{destination_name}'" unless valid_state_name?(destination_name) | |
transition.destination_name = destination_name | |
end | |
return transition | |
end | |
## Given text input, parse it into a list of states, each with a list of transitions. | |
def parse(input) | |
# Use our lex() function from last time, | |
# but put it in a form where we can get words on demand. | |
lexer = enum_for(:lex, input) | |
# Start with no states and no current state. | |
states = [] | |
current_state = nil | |
while true | |
# Look at the first token from the lexer, but don't consume it yet. | |
first_token = lexer.peek() | |
# If we're done, jump out of the loop! | |
break if first_token.nil? | |
# Are we trying to make a new state or a new transition? | |
case first_token | |
when "state", "start", "final" | |
current_state = parse_state(lexer) | |
states << current_state | |
else | |
new_transition = parse_transition(lexer) | |
raise "no current state" if current_state.nil? | |
current_state.transitions << new_transition | |
end | |
end | |
return states | |
end | |
if __FILE__ == $0 | |
# Parse the entire input file, then print out each resulting state. | |
parse(ARGF.read()).each do |state| | |
# First print the state's name and attributes. | |
print("#{state.name}") | |
if state.start? && state.final? | |
print(" (start, final)") | |
elsif state.start? | |
print(" (start)") | |
elsif state.final? | |
print(" (final)") | |
end | |
# What's the difference between 'print' and 'puts'? | |
# The latter adds a line break; the former doesn't. | |
puts(":") | |
# Then print all its transitions. | |
state.transitions.each do |transition| | |
puts("\t#{transition.to_s}") | |
end | |
puts | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment