Created
October 7, 2012 12:46
-
-
Save dpk/3848296 to your computer and use it in GitHub Desktop.
Ruby HTML Diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'hpricot' | |
require 'diff/lcs' | |
class HTMLDiff | |
def self.diff a, b | |
self.new(a, b).compare | |
end | |
def initialize a, b | |
@ah = Hpricot.parse a | |
@bh = Hpricot.parse b | |
@diff_source = "" | |
end | |
def compare | |
@diff_source = compare_children(children(@ah), children(@bh)) | |
end | |
def compare_children ac, bc | |
src = '' | |
Diff::LCS.sdiff(ac, bc).each do |diff| | |
case diff.action | |
when '-' | |
src << "<del class=\"diff\">#{diff.old_element.to_s}</del>" | |
when '+' | |
src << "<ins class=\"diff\">#{diff.new_element.to_s}</ins>" | |
when '!' | |
kidcomp = [] | |
[diff.old_element, diff.new_element].each do |orig| | |
if orig.is_a? Tag | |
kidcomp << children(orig.elem) | |
else | |
kidcomp << orig.split(/\b/) | |
end | |
end | |
if kidcomp.all? {|c| c.length == 1 and c.first.is_a? String } | |
src << "<del class=\"diff modified\">#{diff.old_element.to_s}</del>" | |
src << "<ins class=\"diff modified\">#{diff.new_element.to_s}</ins>" | |
else | |
src << compare_children(*kidcomp) | |
end | |
else | |
src << diff.new_element.to_s | |
end | |
end | |
src | |
end | |
def children h | |
kids = [] | |
h.children.each do |child| | |
case child | |
when Hpricot::Text | |
kids.push *child.to_s.split(/\b/) | |
when Hpricot::Elem | |
kids << Tag.new(child) | |
else | |
raise 'unexpected element type' | |
end | |
end | |
kids | |
end | |
class Tag | |
attr_accessor :elem, :name, :attrs, :contents | |
def initialize hp | |
@elem = hp | |
@name = hp.name | |
@attrs = hp.attributes | |
@contents = hp.inner_text | |
end | |
def to_s | |
e = Hpricot::Elem.new @name | |
@attrs.to_hash.each {|k, v| e.set_attribute k, v } | |
e.inner_html = @contents | |
e.to_s | |
end | |
def == b | |
(@name == b.name) and (@contents == b.contents) | |
end | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment