Skip to content

Instantly share code, notes, and snippets.

@skryukov
Created Oct 23, 2021
Embed
What would you like to do?
class Lexer
# list of all symbols in our calculator with token names
PUNCTUATION = {
'+' => :tPLUS, '-' => :tMINUS,
'*' => :tSTAR, '/' => :tDIVIDE,
'(' => :tLPAREN, ')' => :tRPAREN
}
%%{ # fix highlighting %
machine ultimate_math_machine;
access @;
variable eof @eof;
# regexp-like rules
number = ('-'?[0-9]+('.'[0-9]+)?);
operator = "+" | "-" | "/" | "*";
paren = "(" | ")";
main := |*
# when number is passed, call emmit with token type :tNUMBER
number => { emit(:tNUMBER) };
# when an operator or a parenthesis is passed,
# call emmit_table to use PUNCTUATION to choose token
operator | paren => { emit_table(PUNCTUATION) };
# space is a predefined Ragel state machine for whitespaces
space;
*|;
}%% # fix highlighting %
def initialize
@data = nil # array of input symbols
@ts = nil # token start index
@te = nil # token end index
@eof = nil # EOF index
@tokens = [] # resulting array of tokens
end
def run(input)
@data = input.unpack("c*") if input.is_a?(String)
@eof = input.length
%%{ # fix highlighting %
write data;
write init;
write exec;
}%% # fix highlighting %
# return tokens as a result
@tokens
end
# rebuild substring from input array and current indices
def current_token
@data[@ts...@te].pack("c*")
end
# push current token to the resulting array
def emit(type, tok = current_token)
@tokens.push([type, tok])
end
# use passed hash `table` to define type of the token and call `emit`
def emit_table(table)
token = current_token
emit(table[token], token)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment