-
-
Save eregon/818225a4c22a01f26a4a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<playlist> | |
<tracklist> | |
<track> | |
<name>First Track!</name> | |
<author>Who is it?</author> | |
</track> | |
<track> | |
<name>My contribution</name> | |
<author>Me</author> | |
</track> | |
</tracklist> | |
</playlist> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<pl> | |
<tl> | |
<n_t>First Track!</n_t> | |
<a_t>Who is it?</a_t> | |
<n_t>My contribution</n_t> | |
<a_t>Me</a_t> | |
</tl> | |
</pl> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# RPCFN 8: XML Transformer | |
# Benoit Daloze | |
# Ruby 1.9.2 >= r25032 (Enumerable#slice_before) | |
# | |
# This solution use my personnal TreeNode class | |
# My TreeNode class acts as an "inside" Tree, being a module extending the object that become a TreeNode | |
# Sorry if that class is long, it's not the main work for this challenge, and the API is easy, so just keep in mind: | |
# obj.extend TreeNode # make obj a TreeNode | |
# obj << other # add other as a child | |
# obj >> other # remove other from his parent obj | |
# | |
# The code is kind of speaking by itself, so that's why I did not comment it much | |
# | |
# Enjoy! | |
# | |
# Update: | |
# Add another test to show the flexibility (2_structure+aliases.xml) | |
# Structure is deduced from result, but only the common part | |
# The rest has to be explicitely given as aliases because of its ambiguity | |
require "nokogiri" | |
require_relative "tree" | |
DIR = "xmlfiles" | |
TARGET = "result" | |
class Array | |
def only | |
raise IndexError, "Array#only called on non-single-element array (size=#{size}): #{self}" unless size == 1 | |
first | |
end | |
end | |
class XNode < String | |
def to_s | |
"<#{super}>" | |
end | |
end | |
class XText < String | |
end | |
class Nokogiri::XML::Node | |
def to_a | |
children.each_with_object([]) { |child, a| | |
case child | |
when Nokogiri::XML::Text | |
a << XText.new(child.text) unless child.to_s.match(/\A\s+\z/) | |
when Nokogiri::XML::Node | |
a << XNode.new(child.name) << child.to_a | |
end | |
} | |
end | |
end | |
module TreeNode | |
def to_nokogiri_node(doc) | |
node = Nokogiri::XML::Node.new(self, doc) | |
children.each { |c| | |
case c | |
when XNode | |
node << c.to_nokogiri_node(doc) | |
when XText | |
node << Nokogiri::XML::Text.new(c, doc) | |
end | |
} | |
node | |
end | |
def to_nokogiri_doc | |
Nokogiri::XML::Document.new.tap { |doc| doc << self.root.to_nokogiri_node(doc) } | |
end | |
end | |
class XMLTransformer | |
ALIASES = { "surname" => "last_name" } | |
attr_reader :tree, :result_tree | |
def initialize(file, result = "#{DIR}/#{TARGET}.xml", aliases = {}) | |
@result_tree = TreeNode.from_ary Nokogiri::XML(IO.read(result)).to_a | |
@structure = result_tree.linearize.reduce(:&).map(&:dup) | |
@tree = TreeNode.from_ary Nokogiri::XML(IO.read(file)).to_a | |
@aliases = ALIASES.merge(aliases) | |
end | |
def to_s | |
@tree.show_simple | |
end | |
def aliases | |
tree.all.each { |n| | |
@aliases.keys.each { |to_replace| | |
n.replace(@aliases[to_replace].to_s) if n == to_replace.to_s | |
} | |
} | |
end | |
def nesting | |
# last_name => name > last | |
node_nested = @tree.all.select { |node| node.include?('_') } | |
groups = node_nested.group_by { |n| n.parent.object_id } | |
groups.each_value { |nodes| | |
if nodes.size >= 2 | |
parent = nodes[0].parent | |
nodes.each { |n| | |
last_par = n.split('_').reverse.inject(parent) { |par, child| | |
par << child | |
child | |
} | |
n.children.each { |c| | |
c.parent >> c | |
last_par << c | |
} | |
parent >> n | |
} | |
end | |
} | |
# group same nodes | |
# criteria : no same subnodes under them | |
@tree.all.group_by.select { |n| !n.leaf? and !n.children.any? { |c| XText === c } and tree.all.count(n) > 1 }.uniq.each { |multi_node| | |
@tree.all.select { |n| n == multi_node }.group_by(&:depth).each_value { |group| | |
group.slice_before([]) { |g, children| | |
r = !(g.children & children).empty? | |
children.clear if r | |
children.concat g.children | |
r | |
}.each do |merging| | |
children = merging.inject([]) { |cc, n| cc + n.children } | |
if merging.size > 1 | |
first = merging.shift | |
children.each { |child| | |
unless first.children.any? { |c| c.equal? child } | |
child.parent >> child | |
first << child | |
end | |
} | |
merging.each { |n| n.parent >> n } | |
@tree.all.select { |n| n.leaf? and XNode === n }.each { |n| n.parent >> n } | |
end | |
end | |
} | |
} | |
end | |
def structure | |
@structure.size.times { |i| | |
@tree.all.select { |n| n.depth == i and n != @structure[i] }.each { |n| | |
n.replace(@structure[i]) | |
} | |
} | |
end | |
def transform | |
aliases | |
nesting | |
aliases | |
structure | |
@tree | |
end | |
end | |
f = File.join(DIR, "nesting.xml") | |
#f = File.join(DIR, "source1.xml") | |
xml = XMLTransformer.new(f) | |
puts xml.tree.show_simple | |
puts | |
xml.transform | |
puts xml.tree.show_simple | |
puts | |
puts xml.result_tree.show_simple | |
puts | |
puts xml.tree.to_nokogiri_doc | |
if __FILE__ == $0 | |
require "test/unit" | |
class TestXMLTransformer < Test::Unit::TestCase | |
%w[source1 source2 source3 nesting].each { |f| | |
define_method("test_#{f}") { | |
xml = XMLTransformer.new("#{DIR}/#{f}.xml") | |
assert_equal IO.read("#{DIR}/#{TARGET}.xml"), xml.transform.to_nokogiri_doc.to_s.strip | |
} | |
} | |
%w[2_structure+aliases].each { |f| | |
define_method("test_#{f}") { | |
xml = XMLTransformer.new("#{DIR}/#{f}.xml", "#{DIR}/2_result.xml", n: "name", a: "author") | |
assert_equal IO.read("#{DIR}/2_result.xml"), xml.transform.to_nokogiri_doc.to_s.strip | |
} | |
} | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<people> | |
<first_name_person>Winnie</first_name_person> | |
<last_name_person>the Pooh</last_name_person> | |
<first_name_person>Jamie</first_name_person> | |
<last_name_person>the Weeh</last_name_person> | |
</people> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=begin | |
Tree module, represent a generic Tree structure in Ruby | |
There are 2 ways to work: | |
Creating Node Objects as containers | |
or | |
extending the object to a TreeNode | |
Only the second way is kept now. | |
There are 2 ways to react when trying to add an object who's already in the Tree(same object_id): | |
raise an error | |
or | |
duplicate the object | |
The second is maybe more handy, | |
but a bit slower because it needs to dup every child added. | |
It can also be a little weird because you create every time a second object | |
The first handle that with not allowing a change of the parent if it is already setted | |
The second is implemented in tree/dup.rb | |
A third implementation is tree/fast.rb, who doesn't raise anything, | |
and then is very dangerous to use if you add children that already are in a Tree. | |
You then have to use .dup to every child you try to add, and you are not sure it is in a Tree | |
Here is a benchmark on some big Trees: | |
0.131 normal | |
0.118 dup (removing the #dup useless) | |
0.116 fast | |
=end | |
module TreeNode | |
INDENT = 2 | |
# Build a Tree from an Array with the format: [root, [n1, [*n1_children], n2, [*n2_children], ...]] | |
# Here is a simple example: | |
# ['1', [ '11', ['111', '112'], '12' ]] | |
# 1 | |
# 11 | |
# 111 | |
# 112 | |
# 12 | |
def self.from_ary(ary) | |
if ary.size == 2 and !(Array === ary[0]) and Array === ary[1] | |
ary[0].extend TreeNode | |
add_children_to_parent(*ary) | |
else | |
raise ArgumentError, "wrong format, must be [root, [child1, [children1], child2, [children2], ...]]" | |
end | |
end | |
private | |
def self.add_children_to_parent(parent, children) | |
children.each_slice(2) { |child, sub_children| | |
sub_children = [] if sub_children.nil? | |
parent << child | |
# if sub_children ~ [c1, [cc1], c2, [cc2], ...] | |
if sub_children.each_index.all? { |i| i.even? != (Array === sub_children[i]) } | |
add_children_to_parent(child, sub_children) | |
else | |
sub_children.each { |sc| child << sc } | |
end | |
} | |
parent | |
end | |
# Accessors | |
protected | |
def parent= p | |
unless @_node_parent.nil? or @_node_parent.equal?(p) or p.nil? | |
raise ArgumentError, "Not allowed to set parent if already setted" | |
end | |
@_node_parent = p | |
end | |
public | |
def children= children | |
children.grep(TreeNode) { |n| | |
n.parent >> n unless equal?(n.parent) | |
} | |
children.each { |c| self << c } | |
end | |
def parent | |
@_node_parent | |
end | |
def children | |
@_node_children | |
end | |
def node_init # ~ initialize | |
@_node_parent ||= nil | |
@_node_children ||= [] | |
self | |
end | |
def self.extended(obj) # called when extended, equivalent of ::new for a module extended | |
obj.node_init | |
end | |
# If you want to include this module, you'll need to call node_init | |
# Call real object's method instead of TreeNode's methods | |
def call_object_method(method, *args, &b) | |
self.class.instance_method(method).bind(self).call(*args, &b) | |
end | |
alias :old :call_object_method | |
# Add child or children(Array) | |
def << child | |
case child | |
when TreeNode | |
raise ArgumentError, "Cannot duplicate the root" if root.equal?(child) | |
child.parent = self | |
@_node_children << child | |
child | |
when Array | |
child.each { |c| self << c } | |
nil | |
else | |
self << child.extend(TreeNode) | |
end | |
end | |
# Remove child or children(Array) | |
def >> node | |
case node | |
when TreeNode | |
parents = all.select { |n| n.children.include? node } | |
raise ArgumentError, "parent of #{node} not found" if parents.empty? | |
parents.each { |par| | |
node.parent = nil | |
par.children.delete_if { |n| n.equal?(node) } | |
} | |
when Array | |
node.each { |n| self >> n } | |
end | |
end | |
def [](o) | |
case o | |
when Integer | |
@_node_children[o] | |
else | |
raise ArgumentError | |
end | |
end | |
def inspect | |
"#<Node:#{super}>" | |
end | |
def root | |
root? ? self : @_node_parent.root | |
end | |
def length # Returns the total number of nodes in this (sub)tree | |
@_node_children.inject(1) { |sum, n| sum + n.length } | |
end | |
alias :size :length | |
def depth # depth in the tree (depth of root = 0) | |
# ascendants.length | |
n, i = self, 0 | |
i += 1 until (n = n.parent).nil? | |
i | |
end | |
# Boolean | |
def root? | |
@_node_parent.nil? | |
end | |
def parent?(p) | |
# ascendants.include?(p) | |
n = self | |
(return true if n.equal?(p)) until (n = n.parent).nil? | |
# false | |
end | |
def leaf? | |
@_node_children.empty? | |
end | |
def first? | |
self == first_sibling | |
end | |
def last? | |
self == last_sibling | |
end | |
# Special nodes | |
# Returns the first sibling for this node. If this is the root node, returns itself. | |
def first_sibling | |
root? ? self : @_node_parent.children.first | |
end | |
# Returns the last sibling for this node. If this node is the root, returns itself. | |
def last_sibling | |
root? ? self : @_node_parent.children.last | |
end | |
# Enumerable | |
def all | |
root.descendants_with_self | |
end | |
def ascendants # [parent, ..., root] array of ancestors in reverse order | |
# root? ? [] : [@_node_parent] + @_node_parent.ascendants | |
n, a = self, [] | |
a << n until (n = n.parent).nil? | |
a | |
end | |
def ascendants_with_self | |
root? ? [self] : [self] + @_node_parent.ascendants_with_self | |
end | |
def descendants # [child1, ..., leaf] | |
@_node_children.inject([]) { |desc, c| desc + [c] + c.descendants } | |
end | |
def descendants_with_self | |
@_node_children.inject([self]) { |desc, c| desc + c.descendants_with_self } | |
end | |
def leafs | |
descendants_with_self.select { |n| n.leaf? } | |
end | |
# Root methods | |
def show_tree | |
if root? | |
s = "*" | |
else | |
s = "" | |
#s << " " unless parent.last? | |
s << ' ' * ((depth - 1) * INDENT)# - (parent.last? ? 0 : 1) ) | |
s << (last? ? "+" : "|") | |
s << "-" * (INDENT-1) | |
s << (leaf? ? ">" : "+") | |
end | |
s << " #{self}\n" | |
@_node_children.each { |c| s << c.send(__method__) } | |
s | |
end | |
def show_simple | |
@_node_children.inject( | |
" " * (INDENT * depth) + "#{self.to_s}\n" # to_s is added to ensure it's called even when it's a subclass of String | |
) { |s,c| s << c.send(__method__) } | |
end | |
def show | |
show_simple | |
end | |
# "Linearize" a tree: return an Array representation fo the tree | |
# with each leaf being the last element of each sub-Array, and the ascending nodes before | |
# t = "root".extend(TreeNode) | |
# t << "c1" << ["c11", "c12"] | |
# t << "c2" | |
# t.linearize #=> | |
# [ | |
# [#<Node:"root">, #<Node:"c1">, #<Node:"c11">], | |
# [#<Node:"root">, #<Node:"c1">, #<Node:"c12">], | |
# [#<Node:"root">, #<Node:"c2">] | |
# ] | |
# | |
def linearize | |
descendants_with_self.select(&:leaf?).map { |n| n.ascendants_with_self.reverse } | |
end | |
end | |
if __FILE__ == $0 | |
require "test/unit" | |
class TestTree < Test::Unit::TestCase | |
def setup | |
@c1, @c11, @c111, @c1111, @c1112, @c12, @c2, @c21 = | |
%w[c1 c11 c111 c1111 c1112 c12 c2 c21] | |
@root = @t = "root".extend(TreeNode) | |
@t << @c1 << @c11 | |
@t << @c2 << @c21 | |
@c1 << @c12 | |
@t[0][0] << @c111 << [@c1111, @c1112] | |
#puts @t.show_simple | |
#puts @t.show | |
end | |
def test_basic | |
assert_equal @root, @t.root | |
assert_equal [@c11, @c111, @c1111, @c1112, @c12], @c1.descendants | |
assert_equal [@c1, @c11, @c111, @c1111, @c1112, @c12], @c1.descendants_with_self | |
assert_equal [@c11, @c1, @root], @c111.ascendants | |
assert_equal [@c111, @c11, @c1, @root], @c111.ascendants_with_self | |
assert_equal [@c1111, @c1112, @c12, @c21], @t.leafs | |
assert_equal 9, @t.length | |
assert_equal [ | |
[@root, @c1, @c11, @c111, @c1111], | |
[@root, @c1, @c11, @c111, @c1112], | |
[@root, @c1, @c12], | |
[@root, @c2, @c21] | |
], @t.linearize | |
end | |
def test_depth | |
assert_equal 4, @c1112.depth | |
assert_equal 0, @root.depth | |
end | |
def test_equality | |
assert_equal @c1, @c1 | |
assert_equal @c11, @c1 + "1" | |
end | |
def test_no_same_objects | |
e = assert_raise(ArgumentError) { @c2 << @c1 } | |
assert_equal "Not allowed to set parent if already setted", e.message | |
assert_equal 1, @t.all.select { |n| n == @c1 }.length | |
e = assert_raise(ArgumentError) { @c1 << @c21 } | |
assert_equal "Not allowed to set parent if already setted", e.message | |
assert_equal 1, @t.all.select { |n| n == @c21 }.length | |
new_root = "new_root" | |
new_root.extend(TreeNode) << @root # @root.parent = new_root.extend(TreeNode) | |
assert !@root.root? | |
assert new_root.root? | |
e = assert_raise(ArgumentError) { @c1 << new_root } | |
assert_equal "Cannot duplicate the root", e.message | |
assert new_root.root? | |
e = assert_raise(NoMethodError) { @c21.parent = @root } | |
assert_equal "protected method `parent=' called for #<Node:\"c21\">", e.message | |
assert_equal @c2, @c21.parent | |
end | |
def test_from_ary | |
a = ['1', [ '11', ['111', '112'], '12' ]] | |
t = TreeNode.from_ary(a) | |
e = <<TREE | |
1 | |
11 | |
111 | |
112 | |
12 | |
TREE | |
assert_equal e, t.show_simple | |
end | |
def test_call_object_method | |
str = "Strin".extend(TreeNode) | |
str << "g" | |
assert_equal "Strin", str | |
assert_equal ["g"], str.children | |
str.old(:<<, "g") | |
assert_equal "String", str | |
assert_equal ["g"], str.children | |
end | |
def test_remove | |
@t >> @c2 | |
assert_equal [@c1], @t.children | |
assert_equal nil, @c2.parent | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment