quephird/lexer.rb

## lexer.rb
class Lexer
  KEYWORDS = ["def", "class", "if", "true", "false", "nil"]

  IDENTIFIER_REGEX = /\A([a-z]\w*)/
  CONSTANT_REGEX = /\A([A-Z]\w*)/
  NUMBER_REGEX = /\A([0-9]+)/
  STRING_REGEX = /\A"([^"]*)"/
  NEW_BLOCK_REGEX = /\A\:\n( +)/m
  INDENT_REGEX = /\A\n( *)/m
  OPERATOR_REGEX = /\A(\|\||&&|==|!=|<=|>=)/
  SPACE_REGEX = /\A /

  def tokenize_old(code)
    # Remove extra line breaks
    code.chomp!
    # This will hold the generated tokens
    tokens = []
    # Number of spaces in the last indent
    current_indent = 0
    indent_stack = []
    # Current character position
    i = 0
    while i < code.size
      chunk = code[ i..-1 ]

      if identifier = chunk[IDENTIFIER_REGEX, 1]
        if KEYWORDS.include?(identifier)
          tokens << [identifier.upcase.to_sym, identifier]
        else
          tokens << [:IDENTIFIER, identifier]
        end
        # Skip what we just parsed
        i += identifier.size

      elsif constant = chunk[CONSTANT_REGEX, 1]
        tokens << [:CONSTANT, constant]
        i += constant.size

      elsif number = chunk[NUMBER_REGEX, 1]
        tokens << [:NUMBER, number.to_i]
        i += number.size

      elsif string = chunk[STRING_REGEX, 1]
        tokens << [:STRING, string]
        # Skip two more to exclude the `"`s.
        i += string.size + 2

      elsif indent = chunk[NEW_BLOCK_REGEX, 1]
        # Indent should go up when creating a block
        if indent.size <= current_indent
          raise "Bad indent level, got #{indent.size} indents, " +
          "expected > #{current_indent}"
        end
        current_indent = indent.size
        indent_stack.push(current_indent)
        tokens << [:INDENT, indent.size]
        i += indent.size + 2

      elsif indent = chunk[INDENT_REGEX, 1]
        # Case 2
        if indent.size == current_indent
          # Nothing to do, we're still in the same block
          tokens << [:NEWLINE, "\n"]
        # Case 3
        elsif indent.size < current_indent
          while indent.size < current_indent
            indent_stack.pop
            current_indent = indent_stack.last || 0
            tokens << [:DEDENT, indent.size]
          end
        tokens << [:NEWLINE, "\n"]
        # indent.size > current_indent, error!
        else
          # Cannot increase indent level without using ":"
          raise "Missing ':'"
        end
        i += indent.size + 1

      elsif operator = chunk[OPERATOR_REGEX, 1]
        tokens << [operator, operator]
        i += operator.size

      elsif chunk.match(SPACE_REGEX)
        i += 1

      else
        value = chunk[0, 1]
        tokens << [value, value]
        i += 1
      end
    end

    while indent = indent_stack.pop
      tokens << [:DEDENT, indent_stack.first || 0]
    end
    tokens
  end
end
	class Lexer
	KEYWORDS = ["def", "class", "if", "true", "false", "nil"]

	IDENTIFIER_REGEX = /\A([a-z]\w*)/
	CONSTANT_REGEX = /\A([A-Z]\w*)/
	NUMBER_REGEX = /\A([0-9]+)/
	STRING_REGEX = /\A"([^"]*)"/
	NEW_BLOCK_REGEX = /\A\:\n( +)/m
	INDENT_REGEX = /\A\n( *)/m
	OPERATOR_REGEX = /\A(\\|\\|\|&&\|==\|!=\|<=\|>=)/
	SPACE_REGEX = /\A /

	def tokenize_old(code)
	# Remove extra line breaks
	code.chomp!
	# This will hold the generated tokens
	tokens = []
	# Number of spaces in the last indent
	current_indent = 0
	indent_stack = []
	# Current character position
	i = 0
	while i < code.size
	chunk = code[ i..-1 ]

	if identifier = chunk[IDENTIFIER_REGEX, 1]
	if KEYWORDS.include?(identifier)
	tokens << [identifier.upcase.to_sym, identifier]
	else
	tokens << [:IDENTIFIER, identifier]
	end
	# Skip what we just parsed
	i += identifier.size

	elsif constant = chunk[CONSTANT_REGEX, 1]
	tokens << [:CONSTANT, constant]
	i += constant.size

	elsif number = chunk[NUMBER_REGEX, 1]
	tokens << [:NUMBER, number.to_i]
	i += number.size

	elsif string = chunk[STRING_REGEX, 1]
	tokens << [:STRING, string]
	# Skip two more to exclude the `"`s.
	i += string.size + 2

	elsif indent = chunk[NEW_BLOCK_REGEX, 1]
	# Indent should go up when creating a block
	if indent.size <= current_indent
	raise "Bad indent level, got #{indent.size} indents, " +
	"expected > #{current_indent}"
	end
	current_indent = indent.size
	indent_stack.push(current_indent)
	tokens << [:INDENT, indent.size]
	i += indent.size + 2

	elsif indent = chunk[INDENT_REGEX, 1]
	# Case 2
	if indent.size == current_indent
	# Nothing to do, we're still in the same block
	tokens << [:NEWLINE, "\n"]
	# Case 3
	elsif indent.size < current_indent
	while indent.size < current_indent
	indent_stack.pop
	current_indent = indent_stack.last \|\| 0
	tokens << [:DEDENT, indent.size]
	end
	tokens << [:NEWLINE, "\n"]
	# indent.size > current_indent, error!
	else
	# Cannot increase indent level without using ":"
	raise "Missing ':'"
	end
	i += indent.size + 1

	elsif operator = chunk[OPERATOR_REGEX, 1]
	tokens << [operator, operator]
	i += operator.size

	elsif chunk.match(SPACE_REGEX)
	i += 1

	else
	value = chunk[0, 1]
	tokens << [value, value]
	i += 1
	end
	end

	while indent = indent_stack.pop
	tokens << [:DEDENT, indent_stack.first \|\| 0]
	end
	tokens
	end
	end