Skip to content

Instantly share code, notes, and snippets.

@rutan
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rutan/b541a3f7eebedffcc9c2 to your computer and use it in GitHub Desktop.
Save rutan/b541a3f7eebedffcc9c2 to your computer and use it in GitHub Desktop.
# encoding: utf-8
module Torigoya
module PoorXml
class Parser
# 初期化
# @param [String] raw_doc XMLテキスト
def initialize(raw_doc)
@raw_doc = raw_doc
end
# パース処理
# @return [Node] XMLのrootノード
def parse
buffer = ''
str = @raw_doc.dup
stacks = []
root = Node.new('root')
stacks.push @root
while (str.strip! || str.size > 0)
case str
when /^(<\?xml.+\?>)/ # XMLヘッダ(UTF-8以外読まないので無視)
str.sub!($1, '')
when /^(<([^>]+)\/>)/ # 単独タグ
params = $2
str.sub!($1, '')
child = line_to_node(params)
stacks.last.children.push child
when /^(<\/([^>]+)>)/ # 閉じタグ
name = $2
str.sub!($1, '')
stacks.pop
when /^(<([^>]+)>)/ # 開始タグ
params = $2
str.sub!($1, '')
child = line_to_node(params)
stacks.last.children.push child
stacks.push child
when /^([^<]+)</
text = $1
str.sub!($1, '')
child = TextNode.new('text')
child.text = text
stacks.last.children.push child
else
raise 'invalid xml'
end
end
root
end
private
def line_to_node(line)
name, attr_str = line.split(/\s+/, 2)
node = Node.new(name)
if attr_str
i = 0
key = ''
value = ''
read_key_flag = true
while i < attr_str.size
if read_key_flag
case attr_str[i]
when /\s/
when '='
read_key_flag = false
i += 1 # 次は「"」のはずなのでスキップ
else
key << attr_str[i]
end
else
case attr_str[i]
when "\\"
i += 1
when '"'
read_key_flag = true
node.attributes[key] = value
key = ''
value = ''
else
value << attr_str[i]
end
end
i += 1
end
end
node
end
end
class Node
# 初期化
# @param [String] name ノード名
def initialize(name)
@name = name
@attributes = {}
@children = []
end
attr_reader :name
attr_reader :attributes
attr_reader :children
# ノードの子の中から指定の名を持つものを取り出す
# @param [String] name 検索するノードの名前
# @return [Array] 配列
def [](name)
self.children.select { |child| child.name == name }
end
# 属性の取得
# @param [String] name 属性名
# @return [Object] 指定属性の値
def attr(name)
self.attributes[name]
end
# ノード内に含むテキストを返す
# @return [String] 子ノードのテキストを全て連結した文字列
def text
self.children.map(&:text).join('')
end
# Hash化する
# @return [Hash] Hash化した値
def to_h
{
name: @name,
attributes: @attributes,
children: @children.map(&:to_h),
}
end
end
class TextNode < Node
# 初期化
# @param [String] name ノード名
def initialize(name)
super
@text = ''
end
attr_accessor :text
# Hash化する
# @return [Hash] Hash化した値
def to_h
super.tap { |o| o['text'] = @text }
end
end
end
end
file = File.open('./input.xml', encoding: 'utf-8')
parser = Torigoya::PoorXml::Parser.new(file.read)
doc = parser.parse
doc['packet'].first['chat'].each do |node|
puts node.attr('vpos')
puts node.text
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment