Created
July 8, 2011 17:57
-
-
Save reusee/1072372 to your computer and use it in GitHub Desktop.
parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'weakref' | |
$debug = false | |
SPACE = [' ', "\t"] | |
NEWLINE = ["\r", "\n"] | |
QUOTE = { | |
'"' => '"', | |
"'" => "'", | |
'{' => '}', | |
'(' => ')', | |
} | |
def parse_file(f) | |
source = IO.read(f) | |
ret = Parser.new(source, f).root | |
end | |
class ParseError < RuntimeError | |
end | |
class String | |
def color(color) | |
colors = [:grey, :red, :green, :yellow, :blue, :pink, :cyan, :white] | |
"\033[1;#{colors.index(color)+30};40m#{self}\033[0m" | |
end | |
end | |
class Node < Hash | |
def initialize(d = {}) | |
self.merge!({ | |
:value => nil, | |
:quote => nil, | |
:indent => -1, | |
:children => [], | |
:lineno => 0, | |
:file => nil, | |
}) | |
self.merge!(d) | |
end | |
def dump(verbose = false, indent = 0) | |
ret = '' | |
if verbose | |
ret += ["#{' ' * indent}#{self[:value].color(:green)}", | |
" <quote: #{self[:quote].inspect.color(:cyan)}, ", | |
"len: #{self[:value].length.to_s.color(:cyan)}, ", | |
"indent: #{self[:indent].to_s.color(:cyan)}, ", | |
"lineno: #{self[:lineno].to_s.color(:cyan)}>\n"].join | |
else | |
ret += "#{' ' * indent}#{self[:value].color(:green)}\n" | |
end | |
self[:children].each do |node| | |
ret += node.dump(verbose, indent + 4) | |
end | |
ret | |
end | |
end | |
class State < Hash | |
attr_reader :stack | |
attr_accessor :last_node | |
def initialize(d = {}) | |
self.merge!(d) | |
@stack = [] | |
@last_node = nil | |
end | |
def push | |
@stack.push(self.clone) | |
end | |
def pop | |
self.merge!(@stack.pop) | |
end | |
end | |
class SourceIterator < String | |
attr_reader :lineno, :index, :last_char | |
def initialize(s) | |
super(s) | |
@lineno = 1 | |
@index = -1 | |
@last_char = nil | |
end | |
def next | |
@index += 1 | |
if NEWLINE.include? @last_char | |
@lineno += 1 | |
end | |
if @index > self.length | |
return false | |
else | |
@last_char = self[@index] | |
end | |
end | |
end | |
class Parser | |
attr_accessor :root | |
def initialize(source, file = '<string>') | |
@tabstop = 2 | |
@file = file | |
@root = Node.new({:file => file, :value => 'ROOT'}) | |
@state = State.new({ | |
:context => :LINESTART, | |
:indent => -1, | |
:port => WeakRef.new(@root), | |
:in_list => false, | |
:quote => nil, | |
:in_nest => false, | |
}) | |
@s = SourceIterator.new(source) | |
parse | |
strip(@root) | |
end | |
def strip(node) | |
node.delete(:parent) | |
node[:children].each do |child| | |
strip(child) | |
end | |
end | |
def dump_state | |
def d(state) | |
puts [ | |
state[:context].to_s.color(:green), | |
'port:', state[:port][:value].color(:cyan), | |
'indent:', state[:port][:indent].to_s.color(:cyan), | |
'quote:', state[:quote].inspect.color(:cyan), | |
'in_list:', state[:in_list].to_s.color(:cyan), | |
'in_nest:', state[:in_nest].to_s.color(:cyan), | |
].join(' ') | |
end | |
d(@state) | |
@state.stack.reverse.each do |state| | |
d(state) | |
end | |
end | |
def add_node(value) | |
indent = @state[:indent] | |
while indent <= @state[:port][:indent] | |
@state[:port] = @state[:port][:parent] | |
end | |
node = Node.new({ | |
:value => value, | |
:indent => @state[:indent], | |
:quote => @state[:quote], | |
:parent => @state[:port], | |
:lineno => @s.lineno, | |
:file => @file, | |
}) | |
@state[:port][:children] << node | |
@state.last_node = node | |
if $debug | |
puts 'Add node '.color(:blue) + node[:value] | |
end | |
node | |
end | |
def parse | |
no = 1 | |
c = @s.next | |
while c | |
context = @state[:context] | |
if $debug | |
puts '=' * 15 + ' state '.color(:red) + no.to_s.color(:yellow) + ' ' + '=' * 15 | |
no += 1 | |
dump_state | |
puts '-' * 15 + "#{c.inspect.color(:green)} - line: #{@s.lineno.to_s.color(:yellow)}" + '-' * 15 | |
puts @s[@s.index, 30].inspect | |
puts '-' * 30 | |
puts @root.dump(true) | |
puts | |
end | |
if context == :LINESTART | |
if @s[@s.index, 2] == '//' # 注释 | |
@state[:context] = :COMMENT | |
next | |
elsif NEWLINE.include?(c) | |
c = @s.next | |
next | |
else | |
@state[:indent] = 0 | |
while c and SPACE.include?(c) | |
c = @s.next | |
@state[:indent] += 1 | |
end | |
if not c then break end | |
if NEWLINE.include?(c) then next end | |
@state[:context] = :HEAD | |
next | |
end | |
elsif context == :COMMENT | |
while c | |
c = @s.next | |
if NEWLINE.include?(@s[@s.index - 1]) then break end | |
end | |
@state[:context] = :LINESTART | |
next | |
elsif context == :HEAD | |
@state[:in_list] = false | |
if @s[@s.index, 2] == '//' # 注释 | |
@state[:context] = :COMMENT | |
next | |
elsif QUOTE.include?(c) | |
@state[:context] = :HEAD_END | |
@state.push | |
@state[:context] = :QUOTE | |
@state[:quote] = c | |
c = @s.next | |
next | |
elsif c == '[' | |
@state[:indent] += @tabstop | |
@state[:context] = :TAIL | |
@state.push | |
@state[:in_nest] = true | |
@state[:indent] -= @tabstop | |
@state[:context] = :HEAD | |
c = @s.next | |
next | |
else | |
tok_start = @s.index | |
while c and ( | |
(not c =~ /\s/ and c != ']' and @state[:in_nest]) or | |
(not c =~ /\s/ and not @state[:in_nest])) | |
c = @s.next | |
end | |
add_node(@s[tok_start, @s.index - tok_start]) | |
if not c then break end | |
end | |
@state[:context] = :HEAD_END | |
next | |
elsif context == :HEAD_END | |
@state[:port] = WeakRef.new(@state.last_node) | |
while SPACE.include?(c) | |
c = @s.next | |
end | |
if NEWLINE.include?(c) | |
c = @s.next | |
if @state[:in_nest] | |
@state[:indent] += @tabstop | |
@s.lineno += 1 | |
@state[:context] = :TAIL | |
else | |
@state[:context] = :LINESTART | |
end | |
next | |
elsif c == ']' | |
@state.pop | |
@state[:port] = WeakRef.new(@state.last_node) | |
@state[:context] = :TAIL | |
c = @s.next | |
if not @state[:in_list] | |
if c == ',' | |
raise ParseError, 'comma syntax not supported here', @file, @s.lineno | |
end | |
else | |
if c != ',' | |
@state[:port] = @state[:port][:parent] | |
@state[:indent] -= @tabstop | |
@state[:in_list] = false | |
end | |
end | |
next | |
else | |
@state[:indent] += @tabstop | |
@state[:context] = :TAIL | |
next | |
end | |
elsif context == :QUOTE | |
quote_balance = 1 | |
tok_start = @s.index | |
while quote_balance != 0 | |
if not c | |
raise ParseError, 'uncorrect quote' | |
elsif c == QUOTE[@state[:quote]] and @s.last_char != "\\" | |
quote_balance -= 1 | |
elsif c == @state[:quote] and @s[@s.index - 1] != "\\" | |
quote_balance += 1 | |
end | |
c = @s.next | |
end | |
node = add_node(@s[tok_start,@s.index - 1 - tok_start]) | |
@state.pop | |
@state.last_node = node | |
next | |
elsif context == :TAIL | |
elsif context == :TAIL_END | |
end | |
end | |
end | |
end | |
def test | |
node = parse_file('t') | |
end | |
test |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment