Skip to content

Instantly share code, notes, and snippets.

@sampersand
Created May 13, 2022 19:02
Show Gist options
  • Save sampersand/2d855786cac4e1c7e9fe7b32775ef5be to your computer and use it in GitHub Desktop.
Save sampersand/2d855786cac4e1c7e9fe7b32775ef5be to your computer and use it in GitHub Desktop.
basic ruby friar parser
class ParseError < RuntimeError; end
class Token
attr_reader :value, :kind, :location
def initialize(value, kind, location)
@value = value
@kind = kind
@location = location
end
def inspect = "Token(#{@value.inspect}, #@kind)"
alias to_s inspect
end
class SourceLocation
attr_reader :file, :line, :column
def initialize(file, line, column)
@file = file
@line = line
@column = column
end
def to_s
"#{file} at #{line}:#{column}"
end
def error(msg)
raise ParseError, "#{self}: #{msg}", caller(1)
end
end
class Tokenizer
KEYWORDS = %w[global function return if else while continue break].map(&:freeze).freeze
def initialize(source, file: nil)
@source = source.chars
@file = file
@line = 1
@column = 1
end
def location
SourceLocation.new @file, @line, @column
end
def next
start = location
case peek
when nil then nil # at eof, `peek` returns `nil`
# strip whitespace
when /\s/
take_while_regex /\s/
self.next
# strip comments
when '#'
take_while { _1 != "\n" }
self.next
# Number literals
when /\d/
int = take_while_regex(/\d/).to_i
if peek.match? /\w/
location.error "invalid suffix after integer literal"
end
Token.new int, :integer, start
# Single quoted strings are just their literal contents.
when "'"
text = take_while { _1 != "'" }
unless advance == "'"
start.error "unterminated single quote encountered"
end
Token.new text, :string, start
# Double quoted strings interpolate their contents somewhat
when '"'
text_start = location
text = take_while { _1 != '"' }
unless advance == '"'
text_start.error "unterminated double quote encountered"
end
# todo interpolation
Token.new text, :string, start
# identifiers
when /\w/
word = take_while_regex /\w/
case word
when 'true' then Token.new true, :literal, start
when 'false' then Token.new false, :literal, start
when 'null' then Token.new nil, :literal, start
when *KEYWORDS then Token.new word, :symbol, start
else Token.new word, :identifier, start
end
when /[-+*\/%<>=!&|,;\(\)\[\]\{\}]/
symbol = advance
if %w[! = < >].include?(symbol) && peek == '='
symbol.concat advance
end
Token.new symbol, :symbol, start
else
raise ParseError, "unknown token start: #{@source[0].inspect}"
end
end
private
def take_while_regex(regex)
take_while { regex.match? _1 }
end
def take_while(&block)
acc = ""
acc.concat advance while peek&.then(&block) # `&.then` so eof doesn't call block.
acc.empty? ? nil : acc
end
def peek = @source.first
def eof? = peek.nil?
def advance
chr = @source.shift or error "`.advance` when at EOF"
if chr == "\n"
@line += 1
@column = 1
else
@column += 1
end
chr
end
end
p = Tokenizer.new(<<EOS); while n = p.next; p n end
function foo () {
x = 34;
}
EOS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment