secret
Last active

  • Download Gist
2_result.xml
XML
1 2 3 4 5 6 7 8 9 10 11 12 13
<?xml version="1.0"?>
<playlist>
<tracklist>
<track>
<name>First Track!</name>
<author>Who is it?</author>
</track>
<track>
<name>My contribution</name>
<author>Me</author>
</track>
</tracklist>
</playlist>
2_structure+aliases.xml
XML
1 2 3 4 5 6 7 8 9 10
<?xml version="1.0"?>
<pl>
<tl>
<n_t>First Track!</n_t>
<a_t>Who is it?</a_t>
<n_t>My contribution</n_t>
<a_t>Me</a_t>
</tl>
</pl>
main.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
# RPCFN 8: XML Transformer
# Benoit Daloze
# Ruby 1.9.2 >= r25032 (Enumerable#slice_before)
#
# This solution use my personnal TreeNode class
# My TreeNode class acts as an "inside" Tree, being a module extending the object that become a TreeNode
# Sorry if that class is long, it's not the main work for this challenge, and the API is easy, so just keep in mind:
# obj.extend TreeNode # make obj a TreeNode
# obj << other # add other as a child
# obj >> other # remove other from his parent obj
#
# The code is kind of speaking by itself, so that's why I did not comment it much
#
# Enjoy!
#
# Update:
# Add another test to show the flexibility (2_structure+aliases.xml)
# Structure is deduced from result, but only the common part
# The rest has to be explicitely given as aliases because of its ambiguity
 
require "nokogiri"
require_relative "tree"
 
DIR = "xmlfiles"
TARGET = "result"
 
class Array
def only
raise IndexError, "Array#only called on non-single-element array (size=#{size}): #{self}" unless size == 1
first
end
end
 
class XNode < String
def to_s
"<#{super}>"
end
end
class XText < String
end
 
class Nokogiri::XML::Node
def to_a
children.each_with_object([]) { |child, a|
case child
when Nokogiri::XML::Text
a << XText.new(child.text) unless child.to_s.match(/\A\s+\z/)
when Nokogiri::XML::Node
a << XNode.new(child.name) << child.to_a
end
}
end
end
 
module TreeNode
def to_nokogiri_node(doc)
node = Nokogiri::XML::Node.new(self, doc)
children.each { |c|
case c
when XNode
node << c.to_nokogiri_node(doc)
when XText
node << Nokogiri::XML::Text.new(c, doc)
end
}
node
end
def to_nokogiri_doc
Nokogiri::XML::Document.new.tap { |doc| doc << self.root.to_nokogiri_node(doc) }
end
end
 
class XMLTransformer
ALIASES = { "surname" => "last_name" }
attr_reader :tree, :result_tree
 
def initialize(file, result = "#{DIR}/#{TARGET}.xml", aliases = {})
@result_tree = TreeNode.from_ary Nokogiri::XML(IO.read(result)).to_a
@structure = result_tree.linearize.reduce(:&).map(&:dup)
@tree = TreeNode.from_ary Nokogiri::XML(IO.read(file)).to_a
@aliases = ALIASES.merge(aliases)
end
 
def to_s
@tree.show_simple
end
 
def aliases
tree.all.each { |n|
@aliases.keys.each { |to_replace|
n.replace(@aliases[to_replace].to_s) if n == to_replace.to_s
}
}
end
 
def nesting
# last_name => name > last
node_nested = @tree.all.select { |node| node.include?('_') }
 
groups = node_nested.group_by { |n| n.parent.object_id }
 
groups.each_value { |nodes|
if nodes.size >= 2
parent = nodes[0].parent
 
nodes.each { |n|
last_par = n.split('_').reverse.inject(parent) { |par, child|
par << child
child
}
 
n.children.each { |c|
c.parent >> c
last_par << c
}
parent >> n
}
end
}
# group same nodes
# criteria : no same subnodes under them
@tree.all.group_by.select { |n| !n.leaf? and !n.children.any? { |c| XText === c } and tree.all.count(n) > 1 }.uniq.each { |multi_node|
@tree.all.select { |n| n == multi_node }.group_by(&:depth).each_value { |group|
group.slice_before([]) { |g, children|
r = !(g.children & children).empty?
children.clear if r
children.concat g.children
r
}.each do |merging|
children = merging.inject([]) { |cc, n| cc + n.children }
if merging.size > 1
first = merging.shift
children.each { |child|
unless first.children.any? { |c| c.equal? child }
child.parent >> child
first << child
end
}
merging.each { |n| n.parent >> n }
@tree.all.select { |n| n.leaf? and XNode === n }.each { |n| n.parent >> n }
end
end
}
}
end
 
def structure
@structure.size.times { |i|
@tree.all.select { |n| n.depth == i and n != @structure[i] }.each { |n|
n.replace(@structure[i])
}
}
end
 
def transform
aliases
nesting
aliases
structure
@tree
end
end
 
f = File.join(DIR, "nesting.xml")
#f = File.join(DIR, "source1.xml")
xml = XMLTransformer.new(f)
 
puts xml.tree.show_simple
puts
 
xml.transform
puts xml.tree.show_simple
 
puts
puts xml.result_tree.show_simple
 
puts
puts xml.tree.to_nokogiri_doc
 
if __FILE__ == $0
require "test/unit"
 
class TestXMLTransformer < Test::Unit::TestCase
%w[source1 source2 source3 nesting].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml")
assert_equal IO.read("#{DIR}/#{TARGET}.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
%w[2_structure+aliases].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml", "#{DIR}/2_result.xml", n: "name", a: "author")
assert_equal IO.read("#{DIR}/2_result.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
end
end
nesting.xml
XML
1 2 3 4 5 6 7
<?xml version="1.0"?>
<people>
<first_name_person>Winnie</first_name_person>
<last_name_person>the Pooh</last_name_person>
<first_name_person>Jamie</first_name_person>
<last_name_person>the Weeh</last_name_person>
</people>
tree.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
=begin
Tree module, represent a generic Tree structure in Ruby
 
There are 2 ways to work:
Creating Node Objects as containers
or
extending the object to a TreeNode
 
Only the second way is kept now.
 
There are 2 ways to react when trying to add an object who's already in the Tree(same object_id):
raise an error
or
duplicate the object
 
The second is maybe more handy,
but a bit slower because it needs to dup every child added.
It can also be a little weird because you create every time a second object
The first handle that with not allowing a change of the parent if it is already setted
 
The second is implemented in tree/dup.rb
 
A third implementation is tree/fast.rb, who doesn't raise anything,
and then is very dangerous to use if you add children that already are in a Tree.
You then have to use .dup to every child you try to add, and you are not sure it is in a Tree
 
Here is a benchmark on some big Trees:
0.131 normal
0.118 dup (removing the #dup useless)
0.116 fast
=end
 
module TreeNode
INDENT = 2
 
# Build a Tree from an Array with the format: [root, [n1, [*n1_children], n2, [*n2_children], ...]]
# Here is a simple example:
# ['1', [ '11', ['111', '112'], '12' ]]
# 1
# 11
# 111
# 112
# 12
def self.from_ary(ary)
if ary.size == 2 and !(Array === ary[0]) and Array === ary[1]
ary[0].extend TreeNode
add_children_to_parent(*ary)
else
raise ArgumentError, "wrong format, must be [root, [child1, [children1], child2, [children2], ...]]"
end
end
private
def self.add_children_to_parent(parent, children)
children.each_slice(2) { |child, sub_children|
sub_children = [] if sub_children.nil?
parent << child
# if sub_children ~ [c1, [cc1], c2, [cc2], ...]
if sub_children.each_index.all? { |i| i.even? != (Array === sub_children[i]) }
add_children_to_parent(child, sub_children)
else
sub_children.each { |sc| child << sc }
end
}
parent
end
 
# Accessors
protected
def parent= p
unless @_node_parent.nil? or @_node_parent.equal?(p) or p.nil?
raise ArgumentError, "Not allowed to set parent if already setted"
end
@_node_parent = p
end
public
def children= children
children.grep(TreeNode) { |n|
n.parent >> n unless equal?(n.parent)
}
children.each { |c| self << c }
end
 
def parent
@_node_parent
end
def children
@_node_children
end
 
def node_init # ~ initialize
@_node_parent ||= nil
@_node_children ||= []
self
end
 
def self.extended(obj) # called when extended, equivalent of ::new for a module extended
obj.node_init
end
# If you want to include this module, you'll need to call node_init
 
# Call real object's method instead of TreeNode's methods
def call_object_method(method, *args, &b)
self.class.instance_method(method).bind(self).call(*args, &b)
end
alias :old :call_object_method
 
# Add child or children(Array)
def << child
case child
when TreeNode
raise ArgumentError, "Cannot duplicate the root" if root.equal?(child)
child.parent = self
@_node_children << child
child
when Array
child.each { |c| self << c }
nil
else
self << child.extend(TreeNode)
end
end
 
# Remove child or children(Array)
def >> node
case node
when TreeNode
parents = all.select { |n| n.children.include? node }
raise ArgumentError, "parent of #{node} not found" if parents.empty?
parents.each { |par|
node.parent = nil
par.children.delete_if { |n| n.equal?(node) }
}
when Array
node.each { |n| self >> n }
end
end
 
def [](o)
case o
when Integer
@_node_children[o]
else
raise ArgumentError
end
end
 
def inspect
"#<Node:#{super}>"
end
 
def root
root? ? self : @_node_parent.root
end
 
def length # Returns the total number of nodes in this (sub)tree
@_node_children.inject(1) { |sum, n| sum + n.length }
end
alias :size :length
 
def depth # depth in the tree (depth of root = 0)
# ascendants.length
n, i = self, 0
i += 1 until (n = n.parent).nil?
i
end
 
# Boolean
def root?
@_node_parent.nil?
end
def parent?(p)
# ascendants.include?(p)
n = self
(return true if n.equal?(p)) until (n = n.parent).nil?
# false
end
def leaf?
@_node_children.empty?
end
def first?
self == first_sibling
end
def last?
self == last_sibling
end
 
# Special nodes
# Returns the first sibling for this node. If this is the root node, returns itself.
def first_sibling
root? ? self : @_node_parent.children.first
end
# Returns the last sibling for this node. If this node is the root, returns itself.
def last_sibling
root? ? self : @_node_parent.children.last
end
 
# Enumerable
def all
root.descendants_with_self
end
def ascendants # [parent, ..., root] array of ancestors in reverse order
# root? ? [] : [@_node_parent] + @_node_parent.ascendants
n, a = self, []
a << n until (n = n.parent).nil?
a
end
def ascendants_with_self
root? ? [self] : [self] + @_node_parent.ascendants_with_self
end
def descendants # [child1, ..., leaf]
@_node_children.inject([]) { |desc, c| desc + [c] + c.descendants }
end
def descendants_with_self
@_node_children.inject([self]) { |desc, c| desc + c.descendants_with_self }
end
 
def leafs
descendants_with_self.select { |n| n.leaf? }
end
 
# Root methods
def show_tree
if root?
s = "*"
else
s = ""
#s << " " unless parent.last?
s << ' ' * ((depth - 1) * INDENT)# - (parent.last? ? 0 : 1) )
s << (last? ? "+" : "|")
s << "-" * (INDENT-1)
s << (leaf? ? ">" : "+")
end
s << " #{self}\n"
 
@_node_children.each { |c| s << c.send(__method__) }
s
end
 
def show_simple
@_node_children.inject(
" " * (INDENT * depth) + "#{self.to_s}\n" # to_s is added to ensure it's called even when it's a subclass of String
) { |s,c| s << c.send(__method__) }
end
 
def show
show_simple
end
 
# "Linearize" a tree: return an Array representation fo the tree
# with each leaf being the last element of each sub-Array, and the ascending nodes before
# t = "root".extend(TreeNode)
# t << "c1" << ["c11", "c12"]
# t << "c2"
# t.linearize #=>
# [
# [#<Node:"root">, #<Node:"c1">, #<Node:"c11">],
# [#<Node:"root">, #<Node:"c1">, #<Node:"c12">],
# [#<Node:"root">, #<Node:"c2">]
# ]
#
def linearize
descendants_with_self.select(&:leaf?).map { |n| n.ascendants_with_self.reverse }
end
end
 
if __FILE__ == $0
require "test/unit"
class TestTree < Test::Unit::TestCase
def setup
@c1, @c11, @c111, @c1111, @c1112, @c12, @c2, @c21 =
%w[c1 c11 c111 c1111 c1112 c12 c2 c21]
@root = @t = "root".extend(TreeNode)
@t << @c1 << @c11
@t << @c2 << @c21
@c1 << @c12
@t[0][0] << @c111 << [@c1111, @c1112]
#puts @t.show_simple
#puts @t.show
end
 
def test_basic
assert_equal @root, @t.root
 
assert_equal [@c11, @c111, @c1111, @c1112, @c12], @c1.descendants
assert_equal [@c1, @c11, @c111, @c1111, @c1112, @c12], @c1.descendants_with_self
 
assert_equal [@c11, @c1, @root], @c111.ascendants
assert_equal [@c111, @c11, @c1, @root], @c111.ascendants_with_self
 
assert_equal [@c1111, @c1112, @c12, @c21], @t.leafs
 
assert_equal 9, @t.length
 
assert_equal [
[@root, @c1, @c11, @c111, @c1111],
[@root, @c1, @c11, @c111, @c1112],
[@root, @c1, @c12],
[@root, @c2, @c21]
], @t.linearize
end
 
def test_depth
assert_equal 4, @c1112.depth
assert_equal 0, @root.depth
end
 
def test_equality
assert_equal @c1, @c1
assert_equal @c11, @c1 + "1"
end
 
def test_no_same_objects
e = assert_raise(ArgumentError) { @c2 << @c1 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c1 }.length
 
e = assert_raise(ArgumentError) { @c1 << @c21 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c21 }.length
 
new_root = "new_root"
new_root.extend(TreeNode) << @root # @root.parent = new_root.extend(TreeNode)
assert !@root.root?
assert new_root.root?
 
e = assert_raise(ArgumentError) { @c1 << new_root }
assert_equal "Cannot duplicate the root", e.message
assert new_root.root?
 
e = assert_raise(NoMethodError) { @c21.parent = @root }
assert_equal "protected method `parent=' called for #<Node:\"c21\">", e.message
assert_equal @c2, @c21.parent
end
def test_from_ary
a = ['1', [ '11', ['111', '112'], '12' ]]
t = TreeNode.from_ary(a)
e = <<TREE
1
11
111
112
12
TREE
assert_equal e, t.show_simple
end
def test_call_object_method
str = "Strin".extend(TreeNode)
str << "g"
assert_equal "Strin", str
assert_equal ["g"], str.children
str.old(:<<, "g")
assert_equal "String", str
assert_equal ["g"], str.children
end
def test_remove
@t >> @c2
assert_equal [@c1], @t.children
assert_equal nil, @c2.parent
end
end
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.