Created
May 13, 2022 19:02
-
-
Save sampersand/2d855786cac4e1c7e9fe7b32775ef5be to your computer and use it in GitHub Desktop.
basic ruby friar parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ParseError < RuntimeError; end | |
class Token | |
attr_reader :value, :kind, :location | |
def initialize(value, kind, location) | |
@value = value | |
@kind = kind | |
@location = location | |
end | |
def inspect = "Token(#{@value.inspect}, #@kind)" | |
alias to_s inspect | |
end | |
class SourceLocation | |
attr_reader :file, :line, :column | |
def initialize(file, line, column) | |
@file = file | |
@line = line | |
@column = column | |
end | |
def to_s | |
"#{file} at #{line}:#{column}" | |
end | |
def error(msg) | |
raise ParseError, "#{self}: #{msg}", caller(1) | |
end | |
end | |
class Tokenizer | |
KEYWORDS = %w[global function return if else while continue break].map(&:freeze).freeze | |
def initialize(source, file: nil) | |
@source = source.chars | |
@file = file | |
@line = 1 | |
@column = 1 | |
end | |
def location | |
SourceLocation.new @file, @line, @column | |
end | |
def next | |
start = location | |
case peek | |
when nil then nil # at eof, `peek` returns `nil` | |
# strip whitespace | |
when /\s/ | |
take_while_regex /\s/ | |
self.next | |
# strip comments | |
when '#' | |
take_while { _1 != "\n" } | |
self.next | |
# Number literals | |
when /\d/ | |
int = take_while_regex(/\d/).to_i | |
if peek.match? /\w/ | |
location.error "invalid suffix after integer literal" | |
end | |
Token.new int, :integer, start | |
# Single quoted strings are just their literal contents. | |
when "'" | |
text = take_while { _1 != "'" } | |
unless advance == "'" | |
start.error "unterminated single quote encountered" | |
end | |
Token.new text, :string, start | |
# Double quoted strings interpolate their contents somewhat | |
when '"' | |
text_start = location | |
text = take_while { _1 != '"' } | |
unless advance == '"' | |
text_start.error "unterminated double quote encountered" | |
end | |
# todo interpolation | |
Token.new text, :string, start | |
# identifiers | |
when /\w/ | |
word = take_while_regex /\w/ | |
case word | |
when 'true' then Token.new true, :literal, start | |
when 'false' then Token.new false, :literal, start | |
when 'null' then Token.new nil, :literal, start | |
when *KEYWORDS then Token.new word, :symbol, start | |
else Token.new word, :identifier, start | |
end | |
when /[-+*\/%<>=!&|,;\(\)\[\]\{\}]/ | |
symbol = advance | |
if %w[! = < >].include?(symbol) && peek == '=' | |
symbol.concat advance | |
end | |
Token.new symbol, :symbol, start | |
else | |
raise ParseError, "unknown token start: #{@source[0].inspect}" | |
end | |
end | |
private | |
def take_while_regex(regex) | |
take_while { regex.match? _1 } | |
end | |
def take_while(&block) | |
acc = "" | |
acc.concat advance while peek&.then(&block) # `&.then` so eof doesn't call block. | |
acc.empty? ? nil : acc | |
end | |
def peek = @source.first | |
def eof? = peek.nil? | |
def advance | |
chr = @source.shift or error "`.advance` when at EOF" | |
if chr == "\n" | |
@line += 1 | |
@column = 1 | |
else | |
@column += 1 | |
end | |
chr | |
end | |
end | |
p = Tokenizer.new(<<EOS); while n = p.next; p n end | |
function foo () { | |
x = 34; | |
} | |
EOS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment