Created
February 22, 2009 23:12
-
-
Save jolts/68680 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'logger' | |
require File.dirname(__FILE__) + '/grammar' | |
module DSL | |
class Parser | |
attr_accessor :pos | |
attr_reader :rules,:string | |
class ParseError < RuntimeError; end | |
def initialize(language_name, &block) | |
@logger=Logger.new(STDOUT) | |
@lex_tokens = [] | |
@rules = {} | |
@start = nil | |
@language_name=language_name | |
instance_eval(&block) | |
end | |
# Tokenize the string into small pieces | |
def tokenize(string) | |
@tokens = [] | |
@string=string.clone | |
until string.empty? | |
# Unless any of the valid tokens of our language are the prefix of | |
# 'string', we fail with an exception | |
raise ParseError, "unable to lex '#{string}" unless @lex_tokens.any? do |tok| | |
match = tok.pattern.match(string) | |
# The regular expression of a token has matched the beginning of 'string' | |
if match | |
@logger.debug("Token #{match[0]} consumed") | |
# Also, evaluate this expression by using the block | |
# associated with the token | |
@tokens << tok.block.call(match.to_s) if tok.block | |
# consume the match and proceed with the rest of the string | |
string = match.post_match | |
true | |
else | |
# this token pattern did not match, try the next | |
false | |
end # if | |
end # raise | |
end # until | |
end | |
def parse(string) | |
# First, split the string according to the "token" instructions given to Parser | |
tokenize(string) | |
# Now, @tokens contains all tokens that are to be parsed. | |
# These variables are used to match if the total number of tokens | |
# are consumed by the parser | |
@pos = 0 | |
@max_pos = 0 | |
@expected = [] | |
# Parse (and evaluate) the tokens received | |
result = @start.parse | |
# If there are unparsed extra tokens, signal error | |
if @pos != @tokens.size | |
raise ParseError, "Parse error. expected: '#{@expected.join(', ')}', found '#{@tokens[@max_pos]}'" | |
end | |
return result | |
end | |
def next_token | |
@pos += 1 | |
return @tokens[@pos - 1] | |
end | |
# Return the next token in the queue | |
def expect(tok) | |
t = next_token | |
if @pos - 1 > @max_pos | |
@max_pos = @pos - 1 | |
@expected = [] | |
end | |
return t if tok === t | |
@expected << tok if @max_pos == @pos - 1 && !@expected.include?(tok) | |
return nil | |
end | |
def to_s | |
"Parser for #{@language_name}" | |
end | |
private | |
LexToken = Struct.new(:pattern, :block) | |
def token(pattern, &block) | |
@lex_tokens << LexToken.new(Regexp.new('\\A' + pattern.source), block) | |
end | |
def start(name, &block) | |
rule(name, &block) | |
@start = @rules[name] | |
end | |
def rule(name,&block) | |
@current_rule = Grammar.new(name, self) | |
@rules[name] = @current_rule | |
instance_eval &block | |
@current_rule = nil | |
end | |
def match(*pattern, &block) | |
@current_rule.send(:match,*pattern,&block) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment