Created
June 30, 2009 16:21
-
-
Save luikore/138251 to your computer and use it in GitHub Desktop.
state lexer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# state lexer | |
require 'strscan' | |
class StateLexer | |
attr_reader :state | |
class ScanError < StandardError; end | |
def initialize opts = {:states => []} | |
@state = :default | |
@states = {:default => [[],[],[]]} | |
@syms, @blks, @regs = @states[:default] | |
opts[:states].each do |s| | |
@states[s] = [[], [], []] | |
end | |
end | |
def state= sym | |
@states[@state] = [@syms, @blks, @regs] | |
# will raise an error if state not exist | |
@syms, @blks, @regs = @states[sym] | |
@state = sym | |
end | |
def scan string | |
@scanner = StringScanner.new string | |
end | |
def _rule sym, reg, &blk | |
raise 'second argument should be a regexp.' unless reg.is_a? Regexp | |
@syms << sym.to_sym | |
@regs << reg | |
@blks << blk | |
end | |
private :_rule | |
def rule hash, &blk | |
if @state == :default | |
@states.each do |s| | |
self.state = s[0] | |
hash.each do |(k, v)| | |
_rule k, v, &blk | |
end | |
end | |
self.state = :default | |
else | |
hash.each do |(k, v)| | |
_rule k, v, &blk | |
end | |
end | |
end | |
def remove_rule sym | |
idx = @syms.index sym.to_sym | |
if idx | |
@syms.delete_at idx | |
@regs.delete_at idx | |
@blks.delete_at idx | |
end | |
end | |
def next_token | |
@regs.each_with_index do |reg, idx| | |
text = @scanner.scan reg | |
if text | |
blk = @blks[idx] | |
return @syms[idx], (blk ? blk[text] : nil) | |
end | |
end | |
if @scanner.eos? | |
return nil | |
else | |
raise ScanError, "#{@state}: Failed to recognize at #{@scanner.pos}" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment