Last active
August 29, 2015 14:10
-
-
Save rutan/b541a3f7eebedffcc9c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
module Torigoya | |
module PoorXml | |
class Parser | |
# 初期化 | |
# @param [String] raw_doc XMLテキスト | |
def initialize(raw_doc) | |
@raw_doc = raw_doc | |
end | |
# パース処理 | |
# @return [Node] XMLのrootノード | |
def parse | |
buffer = '' | |
str = @raw_doc.dup | |
stacks = [] | |
root = Node.new('root') | |
stacks.push @root | |
while (str.strip! || str.size > 0) | |
case str | |
when /^(<\?xml.+\?>)/ # XMLヘッダ(UTF-8以外読まないので無視) | |
str.sub!($1, '') | |
when /^(<([^>]+)\/>)/ # 単独タグ | |
params = $2 | |
str.sub!($1, '') | |
child = line_to_node(params) | |
stacks.last.children.push child | |
when /^(<\/([^>]+)>)/ # 閉じタグ | |
name = $2 | |
str.sub!($1, '') | |
stacks.pop | |
when /^(<([^>]+)>)/ # 開始タグ | |
params = $2 | |
str.sub!($1, '') | |
child = line_to_node(params) | |
stacks.last.children.push child | |
stacks.push child | |
when /^([^<]+)</ | |
text = $1 | |
str.sub!($1, '') | |
child = TextNode.new('text') | |
child.text = text | |
stacks.last.children.push child | |
else | |
raise 'invalid xml' | |
end | |
end | |
root | |
end | |
private | |
def line_to_node(line) | |
name, attr_str = line.split(/\s+/, 2) | |
node = Node.new(name) | |
if attr_str | |
i = 0 | |
key = '' | |
value = '' | |
read_key_flag = true | |
while i < attr_str.size | |
if read_key_flag | |
case attr_str[i] | |
when /\s/ | |
when '=' | |
read_key_flag = false | |
i += 1 # 次は「"」のはずなのでスキップ | |
else | |
key << attr_str[i] | |
end | |
else | |
case attr_str[i] | |
when "\\" | |
i += 1 | |
when '"' | |
read_key_flag = true | |
node.attributes[key] = value | |
key = '' | |
value = '' | |
else | |
value << attr_str[i] | |
end | |
end | |
i += 1 | |
end | |
end | |
node | |
end | |
end | |
class Node | |
# 初期化 | |
# @param [String] name ノード名 | |
def initialize(name) | |
@name = name | |
@attributes = {} | |
@children = [] | |
end | |
attr_reader :name | |
attr_reader :attributes | |
attr_reader :children | |
# ノードの子の中から指定の名を持つものを取り出す | |
# @param [String] name 検索するノードの名前 | |
# @return [Array] 配列 | |
def [](name) | |
self.children.select { |child| child.name == name } | |
end | |
# 属性の取得 | |
# @param [String] name 属性名 | |
# @return [Object] 指定属性の値 | |
def attr(name) | |
self.attributes[name] | |
end | |
# ノード内に含むテキストを返す | |
# @return [String] 子ノードのテキストを全て連結した文字列 | |
def text | |
self.children.map(&:text).join('') | |
end | |
# Hash化する | |
# @return [Hash] Hash化した値 | |
def to_h | |
{ | |
name: @name, | |
attributes: @attributes, | |
children: @children.map(&:to_h), | |
} | |
end | |
end | |
class TextNode < Node | |
# 初期化 | |
# @param [String] name ノード名 | |
def initialize(name) | |
super | |
@text = '' | |
end | |
attr_accessor :text | |
# Hash化する | |
# @return [Hash] Hash化した値 | |
def to_h | |
super.tap { |o| o['text'] = @text } | |
end | |
end | |
end | |
end | |
file = File.open('./input.xml', encoding: 'utf-8') | |
parser = Torigoya::PoorXml::Parser.new(file.read) | |
doc = parser.parse | |
doc['packet'].first['chat'].each do |node| | |
puts node.attr('vpos') | |
puts node.text | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment