Skip to content

Instantly share code, notes, and snippets.

@eregon
Created April 26, 2010 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eregon/818225a4c22a01f26a4a to your computer and use it in GitHub Desktop.
Save eregon/818225a4c22a01f26a4a to your computer and use it in GitHub Desktop.
<?xml version="1.0"?>
<playlist>
<tracklist>
<track>
<name>First Track!</name>
<author>Who is it?</author>
</track>
<track>
<name>My contribution</name>
<author>Me</author>
</track>
</tracklist>
</playlist>
<?xml version="1.0"?>
<pl>
<tl>
<n_t>First Track!</n_t>
<a_t>Who is it?</a_t>
<n_t>My contribution</n_t>
<a_t>Me</a_t>
</tl>
</pl>
# RPCFN 8: XML Transformer
# Benoit Daloze
# Ruby 1.9.2 >= r25032 (Enumerable#slice_before)
#
# This solution use my personnal TreeNode class
# My TreeNode class acts as an "inside" Tree, being a module extending the object that become a TreeNode
# Sorry if that class is long, it's not the main work for this challenge, and the API is easy, so just keep in mind:
# obj.extend TreeNode # make obj a TreeNode
# obj << other # add other as a child
# obj >> other # remove other from his parent obj
#
# The code is kind of speaking by itself, so that's why I did not comment it much
#
# Enjoy!
#
# Update:
# Add another test to show the flexibility (2_structure+aliases.xml)
# Structure is deduced from result, but only the common part
# The rest has to be explicitely given as aliases because of its ambiguity
require "nokogiri"
require_relative "tree"
DIR = "xmlfiles"
TARGET = "result"
class Array
def only
raise IndexError, "Array#only called on non-single-element array (size=#{size}): #{self}" unless size == 1
first
end
end
class XNode < String
def to_s
"<#{super}>"
end
end
class XText < String
end
class Nokogiri::XML::Node
def to_a
children.each_with_object([]) { |child, a|
case child
when Nokogiri::XML::Text
a << XText.new(child.text) unless child.to_s.match(/\A\s+\z/)
when Nokogiri::XML::Node
a << XNode.new(child.name) << child.to_a
end
}
end
end
module TreeNode
def to_nokogiri_node(doc)
node = Nokogiri::XML::Node.new(self, doc)
children.each { |c|
case c
when XNode
node << c.to_nokogiri_node(doc)
when XText
node << Nokogiri::XML::Text.new(c, doc)
end
}
node
end
def to_nokogiri_doc
Nokogiri::XML::Document.new.tap { |doc| doc << self.root.to_nokogiri_node(doc) }
end
end
class XMLTransformer
ALIASES = { "surname" => "last_name" }
attr_reader :tree, :result_tree
def initialize(file, result = "#{DIR}/#{TARGET}.xml", aliases = {})
@result_tree = TreeNode.from_ary Nokogiri::XML(IO.read(result)).to_a
@structure = result_tree.linearize.reduce(:&).map(&:dup)
@tree = TreeNode.from_ary Nokogiri::XML(IO.read(file)).to_a
@aliases = ALIASES.merge(aliases)
end
def to_s
@tree.show_simple
end
def aliases
tree.all.each { |n|
@aliases.keys.each { |to_replace|
n.replace(@aliases[to_replace].to_s) if n == to_replace.to_s
}
}
end
def nesting
# last_name => name > last
node_nested = @tree.all.select { |node| node.include?('_') }
groups = node_nested.group_by { |n| n.parent.object_id }
groups.each_value { |nodes|
if nodes.size >= 2
parent = nodes[0].parent
nodes.each { |n|
last_par = n.split('_').reverse.inject(parent) { |par, child|
par << child
child
}
n.children.each { |c|
c.parent >> c
last_par << c
}
parent >> n
}
end
}
# group same nodes
# criteria : no same subnodes under them
@tree.all.group_by.select { |n| !n.leaf? and !n.children.any? { |c| XText === c } and tree.all.count(n) > 1 }.uniq.each { |multi_node|
@tree.all.select { |n| n == multi_node }.group_by(&:depth).each_value { |group|
group.slice_before([]) { |g, children|
r = !(g.children & children).empty?
children.clear if r
children.concat g.children
r
}.each do |merging|
children = merging.inject([]) { |cc, n| cc + n.children }
if merging.size > 1
first = merging.shift
children.each { |child|
unless first.children.any? { |c| c.equal? child }
child.parent >> child
first << child
end
}
merging.each { |n| n.parent >> n }
@tree.all.select { |n| n.leaf? and XNode === n }.each { |n| n.parent >> n }
end
end
}
}
end
def structure
@structure.size.times { |i|
@tree.all.select { |n| n.depth == i and n != @structure[i] }.each { |n|
n.replace(@structure[i])
}
}
end
def transform
aliases
nesting
aliases
structure
@tree
end
end
f = File.join(DIR, "nesting.xml")
#f = File.join(DIR, "source1.xml")
xml = XMLTransformer.new(f)
puts xml.tree.show_simple
puts
xml.transform
puts xml.tree.show_simple
puts
puts xml.result_tree.show_simple
puts
puts xml.tree.to_nokogiri_doc
if __FILE__ == $0
require "test/unit"
class TestXMLTransformer < Test::Unit::TestCase
%w[source1 source2 source3 nesting].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml")
assert_equal IO.read("#{DIR}/#{TARGET}.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
%w[2_structure+aliases].each { |f|
define_method("test_#{f}") {
xml = XMLTransformer.new("#{DIR}/#{f}.xml", "#{DIR}/2_result.xml", n: "name", a: "author")
assert_equal IO.read("#{DIR}/2_result.xml"), xml.transform.to_nokogiri_doc.to_s.strip
}
}
end
end
<?xml version="1.0"?>
<people>
<first_name_person>Winnie</first_name_person>
<last_name_person>the Pooh</last_name_person>
<first_name_person>Jamie</first_name_person>
<last_name_person>the Weeh</last_name_person>
</people>
=begin
Tree module, represent a generic Tree structure in Ruby
There are 2 ways to work:
Creating Node Objects as containers
or
extending the object to a TreeNode
Only the second way is kept now.
There are 2 ways to react when trying to add an object who's already in the Tree(same object_id):
raise an error
or
duplicate the object
The second is maybe more handy,
but a bit slower because it needs to dup every child added.
It can also be a little weird because you create every time a second object
The first handle that with not allowing a change of the parent if it is already setted
The second is implemented in tree/dup.rb
A third implementation is tree/fast.rb, who doesn't raise anything,
and then is very dangerous to use if you add children that already are in a Tree.
You then have to use .dup to every child you try to add, and you are not sure it is in a Tree
Here is a benchmark on some big Trees:
0.131 normal
0.118 dup (removing the #dup useless)
0.116 fast
=end
module TreeNode
INDENT = 2
# Build a Tree from an Array with the format: [root, [n1, [*n1_children], n2, [*n2_children], ...]]
# Here is a simple example:
# ['1', [ '11', ['111', '112'], '12' ]]
# 1
# 11
# 111
# 112
# 12
def self.from_ary(ary)
if ary.size == 2 and !(Array === ary[0]) and Array === ary[1]
ary[0].extend TreeNode
add_children_to_parent(*ary)
else
raise ArgumentError, "wrong format, must be [root, [child1, [children1], child2, [children2], ...]]"
end
end
private
def self.add_children_to_parent(parent, children)
children.each_slice(2) { |child, sub_children|
sub_children = [] if sub_children.nil?
parent << child
# if sub_children ~ [c1, [cc1], c2, [cc2], ...]
if sub_children.each_index.all? { |i| i.even? != (Array === sub_children[i]) }
add_children_to_parent(child, sub_children)
else
sub_children.each { |sc| child << sc }
end
}
parent
end
# Accessors
protected
def parent= p
unless @_node_parent.nil? or @_node_parent.equal?(p) or p.nil?
raise ArgumentError, "Not allowed to set parent if already setted"
end
@_node_parent = p
end
public
def children= children
children.grep(TreeNode) { |n|
n.parent >> n unless equal?(n.parent)
}
children.each { |c| self << c }
end
def parent
@_node_parent
end
def children
@_node_children
end
def node_init # ~ initialize
@_node_parent ||= nil
@_node_children ||= []
self
end
def self.extended(obj) # called when extended, equivalent of ::new for a module extended
obj.node_init
end
# If you want to include this module, you'll need to call node_init
# Call real object's method instead of TreeNode's methods
def call_object_method(method, *args, &b)
self.class.instance_method(method).bind(self).call(*args, &b)
end
alias :old :call_object_method
# Add child or children(Array)
def << child
case child
when TreeNode
raise ArgumentError, "Cannot duplicate the root" if root.equal?(child)
child.parent = self
@_node_children << child
child
when Array
child.each { |c| self << c }
nil
else
self << child.extend(TreeNode)
end
end
# Remove child or children(Array)
def >> node
case node
when TreeNode
parents = all.select { |n| n.children.include? node }
raise ArgumentError, "parent of #{node} not found" if parents.empty?
parents.each { |par|
node.parent = nil
par.children.delete_if { |n| n.equal?(node) }
}
when Array
node.each { |n| self >> n }
end
end
def [](o)
case o
when Integer
@_node_children[o]
else
raise ArgumentError
end
end
def inspect
"#<Node:#{super}>"
end
def root
root? ? self : @_node_parent.root
end
def length # Returns the total number of nodes in this (sub)tree
@_node_children.inject(1) { |sum, n| sum + n.length }
end
alias :size :length
def depth # depth in the tree (depth of root = 0)
# ascendants.length
n, i = self, 0
i += 1 until (n = n.parent).nil?
i
end
# Boolean
def root?
@_node_parent.nil?
end
def parent?(p)
# ascendants.include?(p)
n = self
(return true if n.equal?(p)) until (n = n.parent).nil?
# false
end
def leaf?
@_node_children.empty?
end
def first?
self == first_sibling
end
def last?
self == last_sibling
end
# Special nodes
# Returns the first sibling for this node. If this is the root node, returns itself.
def first_sibling
root? ? self : @_node_parent.children.first
end
# Returns the last sibling for this node. If this node is the root, returns itself.
def last_sibling
root? ? self : @_node_parent.children.last
end
# Enumerable
def all
root.descendants_with_self
end
def ascendants # [parent, ..., root] array of ancestors in reverse order
# root? ? [] : [@_node_parent] + @_node_parent.ascendants
n, a = self, []
a << n until (n = n.parent).nil?
a
end
def ascendants_with_self
root? ? [self] : [self] + @_node_parent.ascendants_with_self
end
def descendants # [child1, ..., leaf]
@_node_children.inject([]) { |desc, c| desc + [c] + c.descendants }
end
def descendants_with_self
@_node_children.inject([self]) { |desc, c| desc + c.descendants_with_self }
end
def leafs
descendants_with_self.select { |n| n.leaf? }
end
# Root methods
def show_tree
if root?
s = "*"
else
s = ""
#s << " " unless parent.last?
s << ' ' * ((depth - 1) * INDENT)# - (parent.last? ? 0 : 1) )
s << (last? ? "+" : "|")
s << "-" * (INDENT-1)
s << (leaf? ? ">" : "+")
end
s << " #{self}\n"
@_node_children.each { |c| s << c.send(__method__) }
s
end
def show_simple
@_node_children.inject(
" " * (INDENT * depth) + "#{self.to_s}\n" # to_s is added to ensure it's called even when it's a subclass of String
) { |s,c| s << c.send(__method__) }
end
def show
show_simple
end
# "Linearize" a tree: return an Array representation fo the tree
# with each leaf being the last element of each sub-Array, and the ascending nodes before
# t = "root".extend(TreeNode)
# t << "c1" << ["c11", "c12"]
# t << "c2"
# t.linearize #=>
# [
# [#<Node:"root">, #<Node:"c1">, #<Node:"c11">],
# [#<Node:"root">, #<Node:"c1">, #<Node:"c12">],
# [#<Node:"root">, #<Node:"c2">]
# ]
#
def linearize
descendants_with_self.select(&:leaf?).map { |n| n.ascendants_with_self.reverse }
end
end
if __FILE__ == $0
require "test/unit"
class TestTree < Test::Unit::TestCase
def setup
@c1, @c11, @c111, @c1111, @c1112, @c12, @c2, @c21 =
%w[c1 c11 c111 c1111 c1112 c12 c2 c21]
@root = @t = "root".extend(TreeNode)
@t << @c1 << @c11
@t << @c2 << @c21
@c1 << @c12
@t[0][0] << @c111 << [@c1111, @c1112]
#puts @t.show_simple
#puts @t.show
end
def test_basic
assert_equal @root, @t.root
assert_equal [@c11, @c111, @c1111, @c1112, @c12], @c1.descendants
assert_equal [@c1, @c11, @c111, @c1111, @c1112, @c12], @c1.descendants_with_self
assert_equal [@c11, @c1, @root], @c111.ascendants
assert_equal [@c111, @c11, @c1, @root], @c111.ascendants_with_self
assert_equal [@c1111, @c1112, @c12, @c21], @t.leafs
assert_equal 9, @t.length
assert_equal [
[@root, @c1, @c11, @c111, @c1111],
[@root, @c1, @c11, @c111, @c1112],
[@root, @c1, @c12],
[@root, @c2, @c21]
], @t.linearize
end
def test_depth
assert_equal 4, @c1112.depth
assert_equal 0, @root.depth
end
def test_equality
assert_equal @c1, @c1
assert_equal @c11, @c1 + "1"
end
def test_no_same_objects
e = assert_raise(ArgumentError) { @c2 << @c1 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c1 }.length
e = assert_raise(ArgumentError) { @c1 << @c21 }
assert_equal "Not allowed to set parent if already setted", e.message
assert_equal 1, @t.all.select { |n| n == @c21 }.length
new_root = "new_root"
new_root.extend(TreeNode) << @root # @root.parent = new_root.extend(TreeNode)
assert !@root.root?
assert new_root.root?
e = assert_raise(ArgumentError) { @c1 << new_root }
assert_equal "Cannot duplicate the root", e.message
assert new_root.root?
e = assert_raise(NoMethodError) { @c21.parent = @root }
assert_equal "protected method `parent=' called for #<Node:\"c21\">", e.message
assert_equal @c2, @c21.parent
end
def test_from_ary
a = ['1', [ '11', ['111', '112'], '12' ]]
t = TreeNode.from_ary(a)
e = <<TREE
1
11
111
112
12
TREE
assert_equal e, t.show_simple
end
def test_call_object_method
str = "Strin".extend(TreeNode)
str << "g"
assert_equal "Strin", str
assert_equal ["g"], str.children
str.old(:<<, "g")
assert_equal "String", str
assert_equal ["g"], str.children
end
def test_remove
@t >> @c2
assert_equal [@c1], @t.children
assert_equal nil, @c2.parent
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment