Skip to content

Instantly share code, notes, and snippets.

@quephird
Created January 10, 2017 20:02
Show Gist options
  • Save quephird/b06651c328ef6918210710c7a80297d6 to your computer and use it in GitHub Desktop.
Save quephird/b06651c328ef6918210710c7a80297d6 to your computer and use it in GitHub Desktop.
class Lexer
KEYWORDS = ["def", "class", "if", "true", "false", "nil"]
IDENTIFIER_REGEX = /\A([a-z]\w*)/
CONSTANT_REGEX = /\A([A-Z]\w*)/
NUMBER_REGEX = /\A([0-9]+)/
STRING_REGEX = /\A"([^"]*)"/
NEW_BLOCK_REGEX = /\A\:\n( +)/m
INDENT_REGEX = /\A\n( *)/m
OPERATOR_REGEX = /\A(\|\||&&|==|!=|<=|>=)/
SPACE_REGEX = /\A /
def tokenize_old(code)
# Remove extra line breaks
code.chomp!
# This will hold the generated tokens
tokens = []
# Number of spaces in the last indent
current_indent = 0
indent_stack = []
# Current character position
i = 0
while i < code.size
chunk = code[ i..-1 ]
if identifier = chunk[IDENTIFIER_REGEX, 1]
if KEYWORDS.include?(identifier)
tokens << [identifier.upcase.to_sym, identifier]
else
tokens << [:IDENTIFIER, identifier]
end
# Skip what we just parsed
i += identifier.size
elsif constant = chunk[CONSTANT_REGEX, 1]
tokens << [:CONSTANT, constant]
i += constant.size
elsif number = chunk[NUMBER_REGEX, 1]
tokens << [:NUMBER, number.to_i]
i += number.size
elsif string = chunk[STRING_REGEX, 1]
tokens << [:STRING, string]
# Skip two more to exclude the `"`s.
i += string.size + 2
elsif indent = chunk[NEW_BLOCK_REGEX, 1]
# Indent should go up when creating a block
if indent.size <= current_indent
raise "Bad indent level, got #{indent.size} indents, " +
"expected > #{current_indent}"
end
current_indent = indent.size
indent_stack.push(current_indent)
tokens << [:INDENT, indent.size]
i += indent.size + 2
elsif indent = chunk[INDENT_REGEX, 1]
# Case 2
if indent.size == current_indent
# Nothing to do, we're still in the same block
tokens << [:NEWLINE, "\n"]
# Case 3
elsif indent.size < current_indent
while indent.size < current_indent
indent_stack.pop
current_indent = indent_stack.last || 0
tokens << [:DEDENT, indent.size]
end
tokens << [:NEWLINE, "\n"]
# indent.size > current_indent, error!
else
# Cannot increase indent level without using ":"
raise "Missing ':'"
end
i += indent.size + 1
elsif operator = chunk[OPERATOR_REGEX, 1]
tokens << [operator, operator]
i += operator.size
elsif chunk.match(SPACE_REGEX)
i += 1
else
value = chunk[0, 1]
tokens << [value, value]
i += 1
end
end
while indent = indent_stack.pop
tokens << [:DEDENT, indent_stack.first || 0]
end
tokens
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment