LastTalon/Scanner.rb

## Scanner.rb
# Allows the user to scan through a string one character at a time.
# The scanner automatically gets a new line if it needs one and returns
# the new line character to notify the user that a line break has occurred.
class Scanner

  # Initializes the scanner with a new line.
  def initialize
    @cur_line = gets
  end

  # Puts a string or character back onto the scanner.
  def push_back(str)
	@cur_line = str + @cur_line
  end

  # Takes a single character off of the scanner.
  def get_next_char
	if @cur_line.empty?
		initialize
	end
	ch = @cur_line[0]
	@cur_line = @cur_line[1..-1]
	return ch
  end
end

## Tokenizer.rb
require_relative "Scanner.rb"

# Tokenizes input provided. Analyses character by character,
# converting everything to tokens until the end token is processed.
class Tokenizer

  # Initializes the Tokenizer with a new scanner and a list of
  # reserved words.
  def initialize
    @scanner = Scanner.new
    @reserved_words =
      {
         "begin" => 1,
         "end" => 2,
		 "require" => 3,
		 "def" => 4,
		 "class" => 5,
		 "if" => 6,
		 "while" => 7,
		 "else" => 8,
		 "elsif" => 9,
		 "for" => 10,
		 "return" => 11,
		 "and" => 12,
		 "or" => 13
      }
  end

  # The main method of the Tokenizer. Should be called when tokens need
  # to be generated. Separates the input into identifier, reserved, number,
  # comment, string, operator, and end tokens.
  def run
    done = false
    while not done
       ch = @scanner.get_next_char
       if ch =~ /[a-zA-Z_]/
         do_identifier(ch)
	   elsif ch =~ /[0-9]/
	     do_number(ch)
	   elsif ch == '#'
	     do_comment(ch)
	   elsif ch == "\""
	     do_string(ch)
       elsif ch =~ /[\s]/
         ;
       elsif ch == '?'
          puts "End Token: ?"
          done = true
       else
         do_operator(ch)
       end
    end
  end

  # Seperates a single identifier token. Also recognizes if that
  # identifier token is actually a reserved token.
  def do_identifier(ch)
    iden = ch
    done = false
    while not done
      ch1 = @scanner.get_next_char
      if ch1 =~ /[a-zA-Z0-9_]/
        iden += ch1
      else
        done = true
        @scanner.push_back(ch1) if ch1 =~ /[\S]/
      end
    end
    resv = @reserved_words[iden]
    if resv == nil
      puts "Identifier: " + iden
    else
      puts "Reserved Word: " + iden + " - " + resv.to_s
    end
  end

  # Separates a single number token. Only accepts integer numbers.
  def do_number(ch)
    str = ch
    done = false
    while not done
	   ch1 = @scanner.get_next_char
	   if ch1 =~ /[0-9]/
	      str += ch1
	   else
	      done = true
	      @scanner.push_back(ch1) if ch1 =~ /[\S]/
	   end
    end
	puts "Number: " + str
  end

  # Separates a single comment token. Only accepts single
  # line comments to the end of line.
  def do_comment(ch)
	str = ch
	done = false
	while not done
	   ch1 = @scanner.get_next_char
	   if ch1 == "\n"
	      done = true
	   else
	      str += ch1
	   end
	end
    puts "Comment: " + str
  end

  # Separates a single operator token. Supports two character
  # and three character operators.
  def do_operator(ch)
    str = ch
    if ch =~ /[<>=!+\-\*\/%\|~&\^]/
       ch1 = @scanner.get_next_char
       if ch1 == '=' then
	      str += ch1
	   else
	      @scanner.push_back(ch1) if ch1 =~ /[\S]/
	   end
    elsif ch == '.'
	   ch1 = @scanner.get_next_char
	   if ch1 == '.'
	      str += ch1
		  ch1 = @scanner.get_next_char
		  if ch1 == '.'
		     str += ch1
		  else
		     @scanner.push_back(ch1) if ch1 =~ /[\S]/
		  end
	   else
	      @scanner.push_back(ch1) if ch1 =~ /[\S]/
	   end
    end
    puts "Operator: " + str
  end

  # Separates a single string. Recognizes bad strings.
  def do_string(ch)
    str = ch
    done = false
    escaped = false
    while not done
       ch1 = @scanner.get_next_char
       if ch1 == "\n"
         done = true
         puts "Bad String: " + str
         return
       elsif ch1 == '\\'
         str = str + ch1
         escaped = true
       elsif ch1 == '"'
         str = str + ch1
         if not escaped
           done = true
           puts "String: " + str
         end
         escaped = false
       else
         str = str + ch1
         escaped = false
       end
    end
  end

end
	# Allows the user to scan through a string one character at a time.
	# The scanner automatically gets a new line if it needs one and returns
	# the new line character to notify the user that a line break has occurred.
	class Scanner

	# Initializes the scanner with a new line.
	def initialize
	@cur_line = gets
	end

	# Puts a string or character back onto the scanner.
	def push_back(str)
	@cur_line = str + @cur_line
	end

	# Takes a single character off of the scanner.
	def get_next_char
	if @cur_line.empty?
	initialize
	end
	ch = @cur_line[0]
	@cur_line = @cur_line[1..-1]
	return ch
	end
	end
	require_relative "Scanner.rb"

	# Tokenizes input provided. Analyses character by character,
	# converting everything to tokens until the end token is processed.
	class Tokenizer

	# Initializes the Tokenizer with a new scanner and a list of
	# reserved words.
	def initialize
	@scanner = Scanner.new
	@reserved_words =
	{
	"begin" => 1,
	"end" => 2,
	"require" => 3,
	"def" => 4,
	"class" => 5,
	"if" => 6,
	"while" => 7,
	"else" => 8,
	"elsif" => 9,
	"for" => 10,
	"return" => 11,
	"and" => 12,
	"or" => 13
	}
	end

	# The main method of the Tokenizer. Should be called when tokens need
	# to be generated. Separates the input into identifier, reserved, number,
	# comment, string, operator, and end tokens.
	def run
	done = false
	while not done
	ch = @scanner.get_next_char
	if ch =~ /[a-zA-Z_]/
	do_identifier(ch)
	elsif ch =~ /[0-9]/
	do_number(ch)
	elsif ch == '#'
	do_comment(ch)
	elsif ch == "\""
	do_string(ch)
	elsif ch =~ /[\s]/
	;
	elsif ch == '?'
	puts "End Token: ?"
	done = true
	else
	do_operator(ch)
	end
	end
	end

	# Seperates a single identifier token. Also recognizes if that
	# identifier token is actually a reserved token.
	def do_identifier(ch)
	iden = ch
	done = false
	while not done
	ch1 = @scanner.get_next_char
	if ch1 =~ /[a-zA-Z0-9_]/
	iden += ch1
	else
	done = true
	@scanner.push_back(ch1) if ch1 =~ /[\S]/
	end
	end
	resv = @reserved_words[iden]
	if resv == nil
	puts "Identifier: " + iden
	else
	puts "Reserved Word: " + iden + " - " + resv.to_s
	end
	end

	# Separates a single number token. Only accepts integer numbers.
	def do_number(ch)
	str = ch
	done = false
	while not done
	ch1 = @scanner.get_next_char
	if ch1 =~ /[0-9]/
	str += ch1
	else
	done = true
	@scanner.push_back(ch1) if ch1 =~ /[\S]/
	end
	end
	puts "Number: " + str
	end

	# Separates a single comment token. Only accepts single
	# line comments to the end of line.
	def do_comment(ch)
	str = ch
	done = false
	while not done
	ch1 = @scanner.get_next_char
	if ch1 == "\n"
	done = true
	else
	str += ch1
	end
	end
	puts "Comment: " + str
	end

	# Separates a single operator token. Supports two character
	# and three character operators.
	def do_operator(ch)
	str = ch
	if ch =~ /[<>=!+\-\*\/%\\|~&\^]/
	ch1 = @scanner.get_next_char
	if ch1 == '=' then
	str += ch1
	else
	@scanner.push_back(ch1) if ch1 =~ /[\S]/
	end
	elsif ch == '.'
	ch1 = @scanner.get_next_char
	if ch1 == '.'
	str += ch1
	ch1 = @scanner.get_next_char
	if ch1 == '.'
	str += ch1
	else
	@scanner.push_back(ch1) if ch1 =~ /[\S]/
	end
	else
	@scanner.push_back(ch1) if ch1 =~ /[\S]/
	end
	end
	puts "Operator: " + str
	end

	# Separates a single string. Recognizes bad strings.
	def do_string(ch)
	str = ch
	done = false
	escaped = false
	while not done
	ch1 = @scanner.get_next_char
	if ch1 == "\n"
	done = true
	puts "Bad String: " + str
	return
	elsif ch1 == '\\'
	str = str + ch1
	escaped = true
	elsif ch1 == '"'
	str = str + ch1
	if not escaped
	done = true
	puts "String: " + str
	end
	escaped = false
	else
	str = str + ch1
	escaped = false
	end
	end
	end

	end