-
-
Save banister/e9d4081b7d3409e30a57 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Token | |
attr_accessor :type | |
attr_accessor :text | |
def initialize(type, text) | |
self.type, self.text = type, text | |
end | |
def to_s | |
token_name = ListLexer::TokenNames[type] | |
"<#{text}, #{token_name}>" | |
end | |
end | |
class Lexer | |
EOF = -1 | |
EOF_TYPE = 1 | |
attr_accessor :input | |
attr_accessor :p | |
attr_accessor :c | |
def initialize(input) | |
self.p = 0 | |
self.input = input | |
self.c = input[p] | |
end | |
def consume | |
self.p += 1 | |
if p >= input.length | |
self.c = EOF | |
else | |
self.c = input[p] | |
end | |
end | |
def match(x) | |
if self.c == x | |
consume | |
else | |
raise "exepected #{x} but found #{c}" | |
end | |
end | |
end | |
class ListLexer < Lexer | |
NAME = 2 | |
COMMA = 3 | |
LBRACK = 4 | |
RBRACK = 5 | |
TokenNames = [ "n/a", "<EOF>", "NAME", "COMMA", "LBRACK", "RBRACK" ] | |
def get_token_name(x) | |
TokenNames[x] | |
end | |
def initialize(input) | |
super(input) | |
end | |
def letter? | |
('a'..'z').include?(c) || ('A'..'Z').include?(c) | |
end | |
def next_token | |
while c != EOF | |
case c | |
when ' ', '\t', '\n', '\r' | |
WS() | |
next | |
when ',' | |
consume | |
return Token.new(COMMA, ",") | |
when '[' | |
consume | |
return Token.new(LBRACK, "[") | |
when ']' | |
consume | |
return Token.new(RBRACK, "]") | |
else | |
if letter? | |
return NAME() | |
raise "Invalid character #{c}" | |
end | |
end | |
end | |
return Token.new(EOF_TYPE, "<EOF>") | |
end | |
def NAME() | |
str = "" | |
loop do | |
str << c | |
consume | |
break if !letter? | |
end | |
return Token.new(NAME, str) | |
end | |
def WS() | |
consume while [" ", "\t", "\n", "\r"].include?(c) | |
end | |
end | |
class Parser | |
attr_accessor :input | |
attr_accessor :lookahead | |
def initialize(input) | |
self.input = input | |
self.lookahead = input.next_token | |
end | |
def match(x) | |
if lookahead.type == x | |
consume | |
else | |
raise "Expecting #{input.get_token_name(x)}; found #{lookahead}" | |
end | |
end | |
def consume | |
self.lookahead = input.next_token | |
end | |
end | |
class ListParser < Parser | |
def initialize(input) | |
super(input) | |
end | |
def list | |
match(ListLexer::LBRACK) | |
elements() | |
match(ListLexer::RBRACK) | |
end | |
def elements | |
element | |
while lookahead.type == ListLexer::COMMA | |
match(ListLexer::COMMA) | |
element | |
end | |
end | |
def element | |
if lookahead.type == ListLexer::NAME | |
match(ListLexer::NAME) | |
elsif lookahead.type == ListLexer::LBRACK | |
list | |
else | |
raise "Expecting name or list; found #{lookahead}" | |
end | |
end | |
end | |
lexer = ListLexer.new(ARGV[0]) | |
parser = ListParser.new(lexer) | |
parser.list |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment