Created
June 11, 2018 22:58
-
-
Save LastTalon/2181639ca52b99d6777a98e1dd846594 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Allows the user to scan through a string one character at a time. | |
# The scanner automatically gets a new line if it needs one and returns | |
# the new line character to notify the user that a line break has occurred. | |
class Scanner | |
# Initializes the scanner with a new line. | |
def initialize | |
@cur_line = gets | |
end | |
# Puts a string or character back onto the scanner. | |
def push_back(str) | |
@cur_line = str + @cur_line | |
end | |
# Takes a single character off of the scanner. | |
def get_next_char | |
if @cur_line.empty? | |
initialize | |
end | |
ch = @cur_line[0] | |
@cur_line = @cur_line[1..-1] | |
return ch | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative "Scanner.rb" | |
# Tokenizes input provided. Analyses character by character, | |
# converting everything to tokens until the end token is processed. | |
class Tokenizer | |
# Initializes the Tokenizer with a new scanner and a list of | |
# reserved words. | |
def initialize | |
@scanner = Scanner.new | |
@reserved_words = | |
{ | |
"begin" => 1, | |
"end" => 2, | |
"require" => 3, | |
"def" => 4, | |
"class" => 5, | |
"if" => 6, | |
"while" => 7, | |
"else" => 8, | |
"elsif" => 9, | |
"for" => 10, | |
"return" => 11, | |
"and" => 12, | |
"or" => 13 | |
} | |
end | |
# The main method of the Tokenizer. Should be called when tokens need | |
# to be generated. Separates the input into identifier, reserved, number, | |
# comment, string, operator, and end tokens. | |
def run | |
done = false | |
while not done | |
ch = @scanner.get_next_char | |
if ch =~ /[a-zA-Z_]/ | |
do_identifier(ch) | |
elsif ch =~ /[0-9]/ | |
do_number(ch) | |
elsif ch == '#' | |
do_comment(ch) | |
elsif ch == "\"" | |
do_string(ch) | |
elsif ch =~ /[\s]/ | |
; | |
elsif ch == '?' | |
puts "End Token: ?" | |
done = true | |
else | |
do_operator(ch) | |
end | |
end | |
end | |
# Seperates a single identifier token. Also recognizes if that | |
# identifier token is actually a reserved token. | |
def do_identifier(ch) | |
iden = ch | |
done = false | |
while not done | |
ch1 = @scanner.get_next_char | |
if ch1 =~ /[a-zA-Z0-9_]/ | |
iden += ch1 | |
else | |
done = true | |
@scanner.push_back(ch1) if ch1 =~ /[\S]/ | |
end | |
end | |
resv = @reserved_words[iden] | |
if resv == nil | |
puts "Identifier: " + iden | |
else | |
puts "Reserved Word: " + iden + " - " + resv.to_s | |
end | |
end | |
# Separates a single number token. Only accepts integer numbers. | |
def do_number(ch) | |
str = ch | |
done = false | |
while not done | |
ch1 = @scanner.get_next_char | |
if ch1 =~ /[0-9]/ | |
str += ch1 | |
else | |
done = true | |
@scanner.push_back(ch1) if ch1 =~ /[\S]/ | |
end | |
end | |
puts "Number: " + str | |
end | |
# Separates a single comment token. Only accepts single | |
# line comments to the end of line. | |
def do_comment(ch) | |
str = ch | |
done = false | |
while not done | |
ch1 = @scanner.get_next_char | |
if ch1 == "\n" | |
done = true | |
else | |
str += ch1 | |
end | |
end | |
puts "Comment: " + str | |
end | |
# Separates a single operator token. Supports two character | |
# and three character operators. | |
def do_operator(ch) | |
str = ch | |
if ch =~ /[<>=!+\-\*\/%\|~&\^]/ | |
ch1 = @scanner.get_next_char | |
if ch1 == '=' then | |
str += ch1 | |
else | |
@scanner.push_back(ch1) if ch1 =~ /[\S]/ | |
end | |
elsif ch == '.' | |
ch1 = @scanner.get_next_char | |
if ch1 == '.' | |
str += ch1 | |
ch1 = @scanner.get_next_char | |
if ch1 == '.' | |
str += ch1 | |
else | |
@scanner.push_back(ch1) if ch1 =~ /[\S]/ | |
end | |
else | |
@scanner.push_back(ch1) if ch1 =~ /[\S]/ | |
end | |
end | |
puts "Operator: " + str | |
end | |
# Separates a single string. Recognizes bad strings. | |
def do_string(ch) | |
str = ch | |
done = false | |
escaped = false | |
while not done | |
ch1 = @scanner.get_next_char | |
if ch1 == "\n" | |
done = true | |
puts "Bad String: " + str | |
return | |
elsif ch1 == '\\' | |
str = str + ch1 | |
escaped = true | |
elsif ch1 == '"' | |
str = str + ch1 | |
if not escaped | |
done = true | |
puts "String: " + str | |
end | |
escaped = false | |
else | |
str = str + ch1 | |
escaped = false | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment