Skip to content

Instantly share code, notes, and snippets.

@LastTalon
Created June 11, 2018 22:58
Show Gist options
  • Save LastTalon/2181639ca52b99d6777a98e1dd846594 to your computer and use it in GitHub Desktop.
Save LastTalon/2181639ca52b99d6777a98e1dd846594 to your computer and use it in GitHub Desktop.
# Allows the user to scan through a string one character at a time.
# The scanner automatically gets a new line if it needs one and returns
# the new line character to notify the user that a line break has occurred.
class Scanner
# Initializes the scanner with a new line.
def initialize
@cur_line = gets
end
# Puts a string or character back onto the scanner.
def push_back(str)
@cur_line = str + @cur_line
end
# Takes a single character off of the scanner.
def get_next_char
if @cur_line.empty?
initialize
end
ch = @cur_line[0]
@cur_line = @cur_line[1..-1]
return ch
end
end
require_relative "Scanner.rb"
# Tokenizes input provided. Analyses character by character,
# converting everything to tokens until the end token is processed.
class Tokenizer
# Initializes the Tokenizer with a new scanner and a list of
# reserved words.
def initialize
@scanner = Scanner.new
@reserved_words =
{
"begin" => 1,
"end" => 2,
"require" => 3,
"def" => 4,
"class" => 5,
"if" => 6,
"while" => 7,
"else" => 8,
"elsif" => 9,
"for" => 10,
"return" => 11,
"and" => 12,
"or" => 13
}
end
# The main method of the Tokenizer. Should be called when tokens need
# to be generated. Separates the input into identifier, reserved, number,
# comment, string, operator, and end tokens.
def run
done = false
while not done
ch = @scanner.get_next_char
if ch =~ /[a-zA-Z_]/
do_identifier(ch)
elsif ch =~ /[0-9]/
do_number(ch)
elsif ch == '#'
do_comment(ch)
elsif ch == "\""
do_string(ch)
elsif ch =~ /[\s]/
;
elsif ch == '?'
puts "End Token: ?"
done = true
else
do_operator(ch)
end
end
end
# Seperates a single identifier token. Also recognizes if that
# identifier token is actually a reserved token.
def do_identifier(ch)
iden = ch
done = false
while not done
ch1 = @scanner.get_next_char
if ch1 =~ /[a-zA-Z0-9_]/
iden += ch1
else
done = true
@scanner.push_back(ch1) if ch1 =~ /[\S]/
end
end
resv = @reserved_words[iden]
if resv == nil
puts "Identifier: " + iden
else
puts "Reserved Word: " + iden + " - " + resv.to_s
end
end
# Separates a single number token. Only accepts integer numbers.
def do_number(ch)
str = ch
done = false
while not done
ch1 = @scanner.get_next_char
if ch1 =~ /[0-9]/
str += ch1
else
done = true
@scanner.push_back(ch1) if ch1 =~ /[\S]/
end
end
puts "Number: " + str
end
# Separates a single comment token. Only accepts single
# line comments to the end of line.
def do_comment(ch)
str = ch
done = false
while not done
ch1 = @scanner.get_next_char
if ch1 == "\n"
done = true
else
str += ch1
end
end
puts "Comment: " + str
end
# Separates a single operator token. Supports two character
# and three character operators.
def do_operator(ch)
str = ch
if ch =~ /[<>=!+\-\*\/%\|~&\^]/
ch1 = @scanner.get_next_char
if ch1 == '=' then
str += ch1
else
@scanner.push_back(ch1) if ch1 =~ /[\S]/
end
elsif ch == '.'
ch1 = @scanner.get_next_char
if ch1 == '.'
str += ch1
ch1 = @scanner.get_next_char
if ch1 == '.'
str += ch1
else
@scanner.push_back(ch1) if ch1 =~ /[\S]/
end
else
@scanner.push_back(ch1) if ch1 =~ /[\S]/
end
end
puts "Operator: " + str
end
# Separates a single string. Recognizes bad strings.
def do_string(ch)
str = ch
done = false
escaped = false
while not done
ch1 = @scanner.get_next_char
if ch1 == "\n"
done = true
puts "Bad String: " + str
return
elsif ch1 == '\\'
str = str + ch1
escaped = true
elsif ch1 == '"'
str = str + ch1
if not escaped
done = true
puts "String: " + str
end
escaped = false
else
str = str + ch1
escaped = false
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment