Skip to content

Instantly share code, notes, and snippets.

@reusee
Created July 8, 2011 17:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reusee/1072372 to your computer and use it in GitHub Desktop.
Save reusee/1072372 to your computer and use it in GitHub Desktop.
parser
require 'weakref'
$debug = false
SPACE = [' ', "\t"]
NEWLINE = ["\r", "\n"]
QUOTE = {
'"' => '"',
"'" => "'",
'{' => '}',
'(' => ')',
}
def parse_file(f)
source = IO.read(f)
ret = Parser.new(source, f).root
end
class ParseError < RuntimeError
end
class String
def color(color)
colors = [:grey, :red, :green, :yellow, :blue, :pink, :cyan, :white]
"\033[1;#{colors.index(color)+30};40m#{self}\033[0m"
end
end
class Node < Hash
def initialize(d = {})
self.merge!({
:value => nil,
:quote => nil,
:indent => -1,
:children => [],
:lineno => 0,
:file => nil,
})
self.merge!(d)
end
def dump(verbose = false, indent = 0)
ret = ''
if verbose
ret += ["#{' ' * indent}#{self[:value].color(:green)}",
" <quote: #{self[:quote].inspect.color(:cyan)}, ",
"len: #{self[:value].length.to_s.color(:cyan)}, ",
"indent: #{self[:indent].to_s.color(:cyan)}, ",
"lineno: #{self[:lineno].to_s.color(:cyan)}>\n"].join
else
ret += "#{' ' * indent}#{self[:value].color(:green)}\n"
end
self[:children].each do |node|
ret += node.dump(verbose, indent + 4)
end
ret
end
end
class State < Hash
attr_reader :stack
attr_accessor :last_node
def initialize(d = {})
self.merge!(d)
@stack = []
@last_node = nil
end
def push
@stack.push(self.clone)
end
def pop
self.merge!(@stack.pop)
end
end
class SourceIterator < String
attr_reader :lineno, :index, :last_char
def initialize(s)
super(s)
@lineno = 1
@index = -1
@last_char = nil
end
def next
@index += 1
if NEWLINE.include? @last_char
@lineno += 1
end
if @index > self.length
return false
else
@last_char = self[@index]
end
end
end
class Parser
attr_accessor :root
def initialize(source, file = '<string>')
@tabstop = 2
@file = file
@root = Node.new({:file => file, :value => 'ROOT'})
@state = State.new({
:context => :LINESTART,
:indent => -1,
:port => WeakRef.new(@root),
:in_list => false,
:quote => nil,
:in_nest => false,
})
@s = SourceIterator.new(source)
parse
strip(@root)
end
def strip(node)
node.delete(:parent)
node[:children].each do |child|
strip(child)
end
end
def dump_state
def d(state)
puts [
state[:context].to_s.color(:green),
'port:', state[:port][:value].color(:cyan),
'indent:', state[:port][:indent].to_s.color(:cyan),
'quote:', state[:quote].inspect.color(:cyan),
'in_list:', state[:in_list].to_s.color(:cyan),
'in_nest:', state[:in_nest].to_s.color(:cyan),
].join(' ')
end
d(@state)
@state.stack.reverse.each do |state|
d(state)
end
end
def add_node(value)
indent = @state[:indent]
while indent <= @state[:port][:indent]
@state[:port] = @state[:port][:parent]
end
node = Node.new({
:value => value,
:indent => @state[:indent],
:quote => @state[:quote],
:parent => @state[:port],
:lineno => @s.lineno,
:file => @file,
})
@state[:port][:children] << node
@state.last_node = node
if $debug
puts 'Add node '.color(:blue) + node[:value]
end
node
end
def parse
no = 1
c = @s.next
while c
context = @state[:context]
if $debug
puts '=' * 15 + ' state '.color(:red) + no.to_s.color(:yellow) + ' ' + '=' * 15
no += 1
dump_state
puts '-' * 15 + "#{c.inspect.color(:green)} - line: #{@s.lineno.to_s.color(:yellow)}" + '-' * 15
puts @s[@s.index, 30].inspect
puts '-' * 30
puts @root.dump(true)
puts
end
if context == :LINESTART
if @s[@s.index, 2] == '//' # 注释
@state[:context] = :COMMENT
next
elsif NEWLINE.include?(c)
c = @s.next
next
else
@state[:indent] = 0
while c and SPACE.include?(c)
c = @s.next
@state[:indent] += 1
end
if not c then break end
if NEWLINE.include?(c) then next end
@state[:context] = :HEAD
next
end
elsif context == :COMMENT
while c
c = @s.next
if NEWLINE.include?(@s[@s.index - 1]) then break end
end
@state[:context] = :LINESTART
next
elsif context == :HEAD
@state[:in_list] = false
if @s[@s.index, 2] == '//' # 注释
@state[:context] = :COMMENT
next
elsif QUOTE.include?(c)
@state[:context] = :HEAD_END
@state.push
@state[:context] = :QUOTE
@state[:quote] = c
c = @s.next
next
elsif c == '['
@state[:indent] += @tabstop
@state[:context] = :TAIL
@state.push
@state[:in_nest] = true
@state[:indent] -= @tabstop
@state[:context] = :HEAD
c = @s.next
next
else
tok_start = @s.index
while c and (
(not c =~ /\s/ and c != ']' and @state[:in_nest]) or
(not c =~ /\s/ and not @state[:in_nest]))
c = @s.next
end
add_node(@s[tok_start, @s.index - tok_start])
if not c then break end
end
@state[:context] = :HEAD_END
next
elsif context == :HEAD_END
@state[:port] = WeakRef.new(@state.last_node)
while SPACE.include?(c)
c = @s.next
end
if NEWLINE.include?(c)
c = @s.next
if @state[:in_nest]
@state[:indent] += @tabstop
@s.lineno += 1
@state[:context] = :TAIL
else
@state[:context] = :LINESTART
end
next
elsif c == ']'
@state.pop
@state[:port] = WeakRef.new(@state.last_node)
@state[:context] = :TAIL
c = @s.next
if not @state[:in_list]
if c == ','
raise ParseError, 'comma syntax not supported here', @file, @s.lineno
end
else
if c != ','
@state[:port] = @state[:port][:parent]
@state[:indent] -= @tabstop
@state[:in_list] = false
end
end
next
else
@state[:indent] += @tabstop
@state[:context] = :TAIL
next
end
elsif context == :QUOTE
quote_balance = 1
tok_start = @s.index
while quote_balance != 0
if not c
raise ParseError, 'uncorrect quote'
elsif c == QUOTE[@state[:quote]] and @s.last_char != "\\"
quote_balance -= 1
elsif c == @state[:quote] and @s[@s.index - 1] != "\\"
quote_balance += 1
end
c = @s.next
end
node = add_node(@s[tok_start,@s.index - 1 - tok_start])
@state.pop
@state.last_node = node
next
elsif context == :TAIL
elsif context == :TAIL_END
end
end
end
end
def test
node = parse_file('t')
end
test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment