Skip to content

Instantly share code, notes, and snippets.

@gmoothart
Created February 29, 2012 18:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gmoothart/1943265 to your computer and use it in GitHub Desktop.
Save gmoothart/1943265 to your computer and use it in GitHub Desktop.
Xml Diff
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Diagnostics;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
/*
XDocument d1 = XDocument.Parse(@"
<xml x='ska' y='reggae'>
<a >foo</a>
<b>
<x>ska</x>
<y>foo</y>
</b>
</xml>");
XDocument d2 = XDocument.Parse(@"
<xml y='ska' z='rock'>
<a>foo</a>
<b>
<x>ska</x>
<y m='a'>bar</y>
</b>
<c>
unique text
</c>
</xml>");
*/
XDocument d1 = XDocument.Load("e:\\orig.xml");
XDocument d2 = XDocument.Load("e:\\new.xml");
Console.WriteLine( Compare(d1,d2).ToString() );
//Compare(d1, d2).Dump();
}
public static XDocument Compare(XDocument d1, XDocument d2) {
XDocument resultDoc = new XDocument(new XElement("result"));
// special case: documents have different root nodes.
// Output both
if (d1.Root.Name.ToString() != d2.Root.Name.ToString()) {
d1.Root.SetAttributeValue("__source", "doc1");
resultDoc.Root.Add(d1.Root);
d2.Root.SetAttributeValue("__source", "doc2");
resultDoc.Root.Add(d2.Root);
}
else {
XElement resultNode = Compare(d1.Root, d2.Root);
resultDoc.Root.Add(resultNode);
}
return resultDoc;
}
public static XElement Compare(XElement d1, XElement d2)
{
// if nodes are completey different we should have noticed
// before now
Debug.Assert(d1.Name.ToString() == d2.Name.ToString());
XElement xeResult = new XElement(d1.Name);
//
// Compare text if elements have no children
//
if (!d1.Elements().Any() && !d2.Elements().Any() &&
d1.Value != d2.Value)
{
xeResult.SetValue(
"__doc1: " + d1.Value.Substring(0, Math.Min(d1.Value.Length, 25)) + Environment.NewLine +
"__doc2: " + d2.Value.Substring(0, Math.Min(d2.Value.Length, 25))
);
}
//
// compare attributes
//
var attr1Enum = d1.Attributes().OrderBy(a => a.Name.ToString());
var attr2Enum = d2.Attributes().OrderBy(a => a.Name.ToString());
MergeSequences(attr1Enum, attr2Enum, new AttributeNameComparer(),
equalFunc: (a1,a2) => {
if (a1.Value != a2.Value) {
xeResult.SetAttributeValue(a1.Name + "__doc1", a1.Value);
xeResult.SetAttributeValue(a2.Name + "__doc2", a2.Value);
}
},
obj1UniqueFunc: (a1) => {
xeResult.SetAttributeValue(a1.Name + "__doc1", a1.Value);
},
obj2UniqueFunc: (a2) => {
xeResult.SetAttributeValue(a2.Name + "__doc2", a2.Value);
}
);
//
// compare children
//
var els1Enum = d1.Elements().OrderBy(el => el.Name.ToString());
var els2Enum = d2.Elements().OrderBy(el => el.Name.ToString());
MergeSequences(els1Enum, els2Enum, new ElementNameComparer(),
equalFunc: (el1,el2) => {
xeResult.Add( Compare(el1, el2) );
},
obj1UniqueFunc: (el1) => {
el1.SetAttributeValue("__source", "doc1");
xeResult.Add(el1);
},
obj2UniqueFunc: (el2) => {
el2.SetAttributeValue("__source", "doc2");
xeResult.Add(el2);
}
);
// return null if nothing has been added to xeResult
if (!xeResult.Attributes().Any() && !xeResult.Elements().Any() &&
string.IsNullOrWhiteSpace(xeResult.Value)) {
return null;
}
return xeResult;
}
/// <summary>
/// Iterates two sequences, looking for duplicate and unique items.
/// The supplied delegate is executed for each case (items are equal,
/// item1 is unique, item2 is uniqu).
///
/// Sequences are assumed to be sorted by the same criteria used in
/// objComparer!
/// </summary>
public static void MergeSequences<T>(IEnumerable<T> seq1, IEnumerable<T> seq2,
IComparer<T> objComparer, Action<T,T> equalFunc, Action<T> obj1UniqueFunc,
Action<T> obj2UniqueFunc)
{
var seq1Enum = seq1.GetEnumerator();
var seq2Enum = seq2.GetEnumerator();
bool seq1HasMoreEls = seq1Enum.MoveNext();
bool seq2HasMoreEls = seq2Enum.MoveNext();
while(seq1HasMoreEls || seq2HasMoreEls) {
var obj1 = seq1Enum.Current;
var obj2 = seq2Enum.Current;
// compare obj1 and obj2
// if we are at the end of one sequence but not the other,
// hard-code the value so we don't compare the last item
// more than once
int nameComparison;
if (seq1HasMoreEls && seq2HasMoreEls)
nameComparison = objComparer.Compare(obj1, obj2);
else if (seq1HasMoreEls) {
nameComparison = -1;
}
else /* seq2HasMoreEls */ {
nameComparison = 1;
}
// objects are equal
if (nameComparison == 0) {
equalFunc(obj1, obj2);
// advance both
seq1HasMoreEls = seq1Enum.MoveNext();
seq2HasMoreEls = seq2Enum.MoveNext();
}
// obj1 is unique
else if (nameComparison < 0) {
obj1UniqueFunc(obj1);
// advance seq1 elements
seq1HasMoreEls = seq1Enum.MoveNext();
}
// obj2 is unique
else /* (nameComparison > 0) */ {
obj2UniqueFunc(obj2);
// advance seq2 elements
seq2HasMoreEls = seq2Enum.MoveNext();
}
}
}
}
public class AttributeNameComparer: IComparer<XAttribute>
{
int IComparer<XAttribute>.Compare(XAttribute x, XAttribute y)
{
return x.Name.ToString().CompareTo(y.Name.ToString());
}
}
public class ElementNameComparer: IComparer<XElement>
{
public int Compare(XElement x, XElement y)
{
return x.Name.ToString().CompareTo(y.Name.ToString());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment