Skip to content

@eregon /2_result.xml secret
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
<?xml version="1.0"?>
<playlist>
<tracklist>
<track>
<name>First Track!</name>
<author>Who is it?</author>
</track>
<track>
<name>My contribution</name>
<author>Me</author>
</track>
</tracklist>
</playlist>
<?xml version="1.0"?>
<pl>
<tl>
<n_t>First Track!</n_t>
<a_t>Who is it?</a_t>
<n_t>My contribution</n_t>
<a_t>Me</a_t>
</tl>
</pl>
# RPCFN 8: XML Transformer
# Benoit Daloze
# Ruby 1.9.2 >= r25032 (Enumerable#slice_before)
#
# This solution use my personnal TreeNode class
# My TreeNode class acts as an "inside" Tree, being a module extending the object that become a TreeNode
# Sorry if that class is long, it's not the main work for this challenge, and the API is easy, so just keep in mind:
# obj.extend TreeNode # make obj a TreeNode
# obj << other # add other as a child
# obj >> other # remove other from his parent obj
#
# The code is kind of speaking by itself, so that's why I did not comment it much
#
# Enjoy!
#
# Update:
# Add another test to show the flexibility (2_structure+aliases.xml)
# Structure is deduced from result, but only the common part
# The rest has to be explicitely given as aliases because of its ambiguity
require "nokogiri"
require_relative "tree"
DIR = "xmlfiles"
TARGET = "result"
class Array
def only
raise IndexError, "Array#only called on non-single-element array (size=#{size}): #{self}" unless size == 1
first
end
end
class XNode < String
def to_s
"<#{super}>"
end
end
class XText < String
end
class Nokogiri::XML::Node
def to_a
children.each_with_object([]) { |child, a|
case child
when Nokogiri::XML::Text
a << XText.new(child.text) unless child.to_s.match(/\A\s+\z/)
when Nokogiri::XML::Node
a << XNode.new(child.name) << child.to_a
end
}
end
end
module TreeNode
def to_nokogiri_node(doc)
node = Nokogiri::XML::Node.new(self, doc)
children.each { |c|
case c
when XNode
node << c.to_nokogiri_node(doc)
when XText
node << Nokogiri::XML::Text.new(c, doc)
end
}
node
end
def to_nokogiri_doc
Nokogiri::XML::Document.new.tap { |doc| doc << self.root.to_nokogiri_node(doc) }
end
end
class XMLTransformer
ALIASES = { "surname" => "last_name" }
attr_reader :tree, :result_tree
def initialize(file, result = "#{DIR}/#{TARGET}.xml", aliases = {})
@result_tree = TreeNode.from_ary Nokogiri::XML(IO.read(result)).to_a
@structure = result_tree.linearize.reduce(:&).map(&:dup)
@tree = TreeNode.from_ary Nokogiri::XML(IO.read(file)).to_a
@aliases = ALIASES.merge(aliases)
end
def to_s
@tree.show_simple
end
def aliases
tree.all.each { |n|
@aliases.keys.each { |to_replace|
n.replace(@aliases[to_replace].to_s) if n == to_replace.to_s
}
}
end
def nesting
# last_name => name > last
node_nested = @tree.all.select { |node| node.include?('_') }
groups = node_nested.group_by { |n| n.parent.object_id }
groups.each_value { |nodes|
if nodes.size >= 2
parent = nodes[0].parent
nodes.each { |n|
last_par = n.split('_').reverse.inject(parent) { |par, child|
par << child
child
}
n.children.each { |c|
c.parent >> c
last_par << c
}
parent >> n
}
end
}
# group same nodes
# criteria : no same subnodes under them
@tree.all.group_by.select { |n| !n.leaf? and !n.children.any? { |c| XText === c } and tree.all.count(n) > 1 }.uniq.each { |multi_node|
@tree.all.select { |n| n == multi_node }.group_by(&:depth).each_value { |group|
group.slice_before([]) { |g, children|
r = !(g.children & children).empty?
children.clear if r
children.concat g.children
r
}.each do |merging|
children = merging.inject([]) { |cc, n| cc + n.children }
if merging.size > 1
first = merging.shift
children.each { |child|
unless first.children.any? { |c| c.equal? child }
child.parent >> child
first << child
end
}
merging.each { |n| n.parent >> n }
@tree.all.select { |n| n.leaf? and XNode === n }.each { |n| n.parent >> n }
end
end
}
}
end
def structure
@structure.size.times { |i|
@tree.all.select { |n| n.depth == i and n != @structure[i] }.each { |n|
n.replace(@structure[i])
}
}
end
def transform
aliases
nesting
aliases
structure
@tree
end
end
f = File.join(DIR, "nesting.xml")
#f = File.join(DIR, "source1.xml")
xml = XMLTransformer.new(f)
puts xml.tree.show_simple
puts
xml.transform
puts xml.tree.show_simple
puts
puts xml.result_tree.show_simple
puts
puts xml.tree.to_nokogiri_doc
if __FILE__ == $0
require "test/unit"
class TestXMLTransformer < Test::Unit::TestCase
%w[source1 source2 source3 nesting].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml")
assert_equal IO.read("#{DIR}/#{TARGET}.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
%w[2_structure+aliases].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml", "#{DIR}/2_result.xml", n: "name", a: "author")
assert_equal IO.read("#{DIR}/2_result.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
end
end
<?xml version="1.0"?>
<people>
<first_name_person>Winnie</first_name_person>
<last_name_person>the Pooh</last_name_person>
<first_name_person>Jamie</first_name_person>
<last_name_person>the Weeh</last_name_person>
</people>
=begin
Tree module, represent a generic Tree structure in Ruby
There are 2 ways to work:
Creating Node Objects as containers
or
extending the object to a TreeNode
Only the second way is kept now.
There are 2 ways to react when trying to add an object who's already in the Tree(same object_id):
raise an error
or
duplicate the object
The second is maybe more handy,
but a bit slower because it needs to dup every child added.
It can also be a little weird because you create every time a second object
The first handle that with not allowing a change of the parent if it is already setted
The second is implemented in tree/dup.rb
A third implementation is tree/fast.rb, who doesn't raise anything,
and then is very dangerous to use if you add children that already are in a Tree.
You then have to use .dup to every child you try to add, and you are not sure it is in a Tree
Here is a benchmark on some big Trees:
0.131 normal
0.118 dup (removing the #dup useless)
0.116 fast
=end
module TreeNode
INDENT = 2
# Build a Tree from an Array with the format: [root, [n1, [*n1_children], n2, [*n2_children], ...]]
# Here is a simple example:
# ['1', [ '11', ['111', '112'], '12' ]]
# 1
# 11
# 111
# 112
# 12
def self.from_ary(ary)
if ary.size == 2 and !(Array === ary[0]) and Array === ary[1]
ary[0].extend TreeNode
add_children_to_parent(*ary)
else
raise ArgumentError, "wrong format, must be [root, [child1, [children1], child2, [children2], ...]]"
end
end
private
def self.add_children_to_parent(parent, children)
children.each_slice(2) { |child, sub_children|
sub_children = [] if sub_children.nil?
parent << child
# if sub_children ~ [c1, [cc1], c2, [cc2], ...]
if sub_children.each_index.all? { |i| i.even? != (Array === sub_children[i]) }
add_children_to_parent(child, sub_children)
else
sub_children.each { |sc| child << sc }
end
}
parent
end
# Accessors
protected
def parent= p
unless @_node_parent.nil? or @_node_parent.equal?(p) or p.nil?
raise ArgumentError, "Not allowed to set parent if already setted"
end
@_node_parent = p
end
public
def children= children
children.grep(TreeNode) { |n|
n.parent >> n unless equal?(n.parent)
}
children.each { |c| self << c }
end
def parent
@_node_parent
end
def children
@_node_children
end
def node_init # ~ initialize
@_node_parent ||= nil
@_node_children ||= []
self
end
def self.extended(obj) # called when extended, equivalent of ::new for a module extended
obj.node_init
end
# If you want to include this module, you'll need to call node_init
# Call real object's method instead of TreeNode's methods
def call_object_method(method, *args, &b)
self.class.instance_method(method).bind(self).call(*args, &b)
end
alias :old :call_object_method
# Add child or children(Array)
def << child
case child
when TreeNode
raise ArgumentError, "Cannot duplicate the root" if root.equal?(child)
child.parent = self
@_node_children << child
child
when Array
child.each { |c| self << c }
nil
else
self << child.extend(TreeNode)
end
end
# Remove child or children(Array)
def >> node
case node
when TreeNode
parents = all.select { |n| n.children.include? node }
raise ArgumentError, "parent of #{node} not found" if parents.empty?
parents.each { |par|
node.parent = nil
par.children.delete_if { |n| n.equal?(node) }
}
when Array
node.each { |n| self >> n }
end
end
def [](o)
case o
when Integer
@_node_children[o]
else
raise ArgumentError
end
end
def inspect
"#<Node:#{super}>"
end
def root
root? ? self : @_node_parent.root
end
def length # Returns the total number of nodes in this (sub)tree
@_node_children.inject(1) { |sum, n| sum + n.length }
end
alias :size :length
def depth # depth in the tree (depth of root = 0)
# ascendants.length
n, i = self, 0
i += 1 until (n = n.parent).nil?
i
end
# Boolean
def root?
@_node_parent.nil?
end
def parent?(p)
# ascendants.include?(p)
n = self
(return true if n.equal?(p)) until (n = n.parent).nil?
# false
end
def leaf?
@_node_children.empty?
end
def first?
self == first_sibling
end
def last?
self == last_sibling
end
# Special nodes
# Returns the first sibling for this node. If this is the root node, returns itself.
def first_sibling
root? ? self : @_node_parent.children.first
end
# Returns the last sibling for this node. If this node is the root, returns itself.
def last_sibling
root? ? self : @_node_parent.children.last
end
# Enumerable
def all
root.descendants_with_self
end
def ascendants # [parent, ..., root] array of ancestors in reverse order
# root? ? [] : [@_node_parent] + @_node_parent.ascendants
n, a = self, []
a << n until (n = n.parent).nil?
a
end
def ascendants_with_self
root? ? [self] : [self] + @_node_parent.ascendants_with_self
end
def descendants # [child1, ..., leaf]
@_node_children.inject([]) { |desc, c| desc + [c] + c.descendants }
end
def descendants_with_self
@_node_children.inject([self]) { |desc, c| desc + c.descendants_with_self }
end
def leafs
descendants_with_self.select { |n| n.leaf? }
end
# Root methods
def show_tree
if root?
s = "*"
else
s = ""
#s << " " unless parent.last?
s << ' ' * ((depth - 1) * INDENT)# - (parent.last? ? 0 : 1) )
s << (last? ? "+" : "|")
s << "-" * (INDENT-1)
s << (leaf? ? ">" : "+")
end
s << " #{self}\n"
@_node_children.each { |c| s << c.send(__method__) }
s
end
def show_simple
@_node_children.inject(
" " * (INDENT * depth) + "#{self.to_s}\n" # to_s is added to ensure it's called even when it's a subclass of String
) { |s,c| s << c.send(__method__) }
end
def show
show_simple
end
# "Linearize" a tree: return an Array representation fo the tree
# with each leaf being the last element of each sub-Array, and the ascending nodes before
# t = "root".extend(TreeNode)
# t << "c1" << ["c11", "c12"]
# t << "c2"
# t.linearize #=>
# [
# [#<Node:"root">, #<Node:"c1">, #<Node:"c11">],
# [#<Node:"root">, #<Node:"c1">, #<Node:"c12">],
# [#<Node:"root">, #<Node:"c2">]
# ]
#
def linearize
descendants_with_self.select(&:leaf?).map { |n| n.ascendants_with_self.reverse }
end
end
if __FILE__ == $0
require "test/unit"
class TestTree < Test::Unit::TestCase
def setup
@c1, @c11, @c111, @c1111, @c1112, @c12, @c2, @c21 =
%w[c1 c11 c111 c1111 c1112 c12 c2 c21]
@root = @t = "root".extend(TreeNode)
@t << @c1 << @c11
@t << @c2 << @c21
@c1 << @c12
@t[0][0] << @c111 << [@c1111, @c1112]
#puts @t.show_simple
#puts @t.show
end
def test_basic
assert_equal @root, @t.root
assert_equal [@c11, @c111, @c1111, @c1112, @c12], @c1.descendants
assert_equal [@c1, @c11, @c111, @c1111, @c1112, @c12], @c1.descendants_with_self
assert_equal [@c11, @c1, @root], @c111.ascendants
assert_equal [@c111, @c11, @c1, @root], @c111.ascendants_with_self
assert_equal [@c1111, @c1112, @c12, @c21], @t.leafs
assert_equal 9, @t.length
assert_equal [
[@root, @c1, @c11, @c111, @c1111],
[@root, @c1, @c11, @c111, @c1112],
[@root, @c1, @c12],
[@root, @c2, @c21]
], @t.linearize
end
def test_depth
assert_equal 4, @c1112.depth
assert_equal 0, @root.depth
end
def test_equality
assert_equal @c1, @c1
assert_equal @c11, @c1 + "1"
end
def test_no_same_objects
e = assert_raise(ArgumentError) { @c2 << @c1 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c1 }.length
e = assert_raise(ArgumentError) { @c1 << @c21 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c21 }.length
new_root = "new_root"
new_root.extend(TreeNode) << @root # @root.parent = new_root.extend(TreeNode)
assert !@root.root?
assert new_root.root?
e = assert_raise(ArgumentError) { @c1 << new_root }
assert_equal "Cannot duplicate the root", e.message
assert new_root.root?
e = assert_raise(NoMethodError) { @c21.parent = @root }
assert_equal "protected method `parent=' called for #<Node:\"c21\">", e.message
assert_equal @c2, @c21.parent
end
def test_from_ary
a = ['1', [ '11', ['111', '112'], '12' ]]
t = TreeNode.from_ary(a)
e = <<TREE
1
11
111
112
12
TREE
assert_equal e, t.show_simple
end
def test_call_object_method
str = "Strin".extend(TreeNode)
str << "g"
assert_equal "Strin", str
assert_equal ["g"], str.children
str.old(:<<, "g")
assert_equal "String", str
assert_equal ["g"], str.children
end
def test_remove
@t >> @c2
assert_equal [@c1], @t.children
assert_equal nil, @c2.parent
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.