Skip to content

Instantly share code, notes, and snippets.

Created May 11, 2016 15:50
Show Gist options
  • Save BrutalSimplicity/b4ce7405ab3e5795064ddc6e78a337aa to your computer and use it in GitHub Desktop.
Save BrutalSimplicity/b4ce7405ab3e5795064ddc6e78a337aa to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.XPath;
using System.Text.RegularExpressions;
using System.IO;
namespace XmlDiffLib
public class XmlDiffNode
public enum DiffTypes { Removed, Added, Changed }
public enum DiffNodeTypes { Tag, Text, Attribute, Node }
public DiffNodeTypes DiffNodeType { get; set; }
public DiffTypes DiffType { get; set; }
public string Origin { get; set; }
public string Comparison { get; set; }
public string XPath { get; set; }
public string Description { get; set; }
public int OriginLineNo { get; set; }
public int CompLineNo { get; set; }
public string DiffId { get; set; }
public List<XmlDiffNode> Descendants;
public XmlDiffNode() { }
public class XmlDiffOptions
public enum IgnoreTextNodeOptions { XmlString, XmlInteger, XmlDouble, XmlDateTime }
public bool IgnoreCase { get; set; }
public bool IgnoreAttributeOrder { get; set; }
public bool IgnoreChildOrder { get; set; }
public bool IgnoreAttributes { get; set; }
public HashSet<XPathNodeType> IgnoreNodes { get; set; }
public bool IgnoreNamespace { get; set; }
public bool IgnorePrefix { get; set; }
public bool TrimWhitespace { get; set; }
public bool StripWhitespace { get; set; }
public bool MatchDescendants { get; set; }
public bool MatchValueTypes { get; set; }
public bool TwoWayMatch { get; set; }
public int MaxAttributesToDisplay { get; set; }
public HashSet<IgnoreTextNodeOptions> IgnoreTextTypes { get; set; }
public XmlDiffOptions()
IgnoreAttributes = false;
IgnoreCase = false;
IgnoreAttributeOrder = true;
IgnoreChildOrder = true;
IgnoreNamespace = true;
IgnorePrefix = true;
TrimWhitespace = true;
StripWhitespace = false;
MatchDescendants = true;
MatchValueTypes = true;
TwoWayMatch = false;
IgnoreNodes = new HashSet<XPathNodeType>();
IgnoreTextTypes = new HashSet<IgnoreTextNodeOptions>();
MaxAttributesToDisplay = -1;
public class XmlDiff
private XPathDocument xmlFromDoc;
private XPathDocument xmlToDoc;
private string fromFilename;
private string toFilename;
private XmlDiffOptions options;
public List<XmlDiffNode> DiffNodeList { get; set; }
public XmlDiff(string fromXml, string toXml, string sourceFromName = "FromXml", string sourceToName = "ToXml")
xmlFromDoc = new XPathDocument(new StringReader(fromXml));
xmlToDoc = new XPathDocument(new StringReader(toXml));
this.fromFilename = sourceFromName;
this.toFilename = sourceToName;
catch (XmlException ex)
throw new XmlException(String.Format("ERROR: An error was encountered in the XML data. Make sure the document is a valid XML document.\nMessge: {0}", ex));
catch (Exception ex)
throw new Exception(ex.Message);
public bool CompareDocuments(XmlDiffOptions options)
this.options = options;
DiffNodeList = CompareNodes(xmlFromDoc.CreateNavigator(), xmlToDoc.CreateNavigator());
if (options.TwoWayMatch)
List<XmlDiffNode> tempNodeList = CompareNodes(xmlToDoc.CreateNavigator(), xmlFromDoc.CreateNavigator());
DiffNodeList.AddRange(tempNodeList.Where((node) => node.DiffType == XmlDiffNode.DiffTypes.Removed)
.Select((node) => { node.DiffType = XmlDiffNode.DiffTypes.Added; return node; }));
catch (Exception ex)
throw new Exception(String.Format("ERROR: An error occurred while comparing XML documents.\nMessage: {0}", ex));
if (DiffNodeList.Count > 0)
return false;
return true;
private bool MatchAttributes(XPathNavigator fromNav, XPathNavigator toNav, out XmlDiffNode nodeInfo)
XPathNavigator xFrom = fromNav.Clone();
XPathNavigator xTo = toNav.Clone();
nodeInfo = null;
if (xFrom.HasAttributes)
if (!options.IgnoreAttributeOrder)
if (!options.IgnoreAttributeOrder)
if (!options.IgnoreNamespace && xFrom.Prefix != xTo.Prefix)
nodeInfo = new XmlDiffNode()
XPath = null,
DiffType = XmlDiffNode.DiffTypes.Changed,
Description = "No matching namespace @" + xFrom.NamespaceURI,
DiffNodeType = XmlDiffNode.DiffNodeTypes.Text,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xTo).LineNumber,
DiffId = string.Empty, // To be filled in by compare method
return false;
if (xFrom.LocalName != xTo.LocalName || xFrom.Value != xTo.Value)
nodeInfo = new XmlDiffNode()
XPath = null,
DiffType = XmlDiffNode.DiffTypes.Changed,
Description = "No matching attribute @" + xFrom.LocalName + " = " + xFrom.Value,
DiffNodeType = XmlDiffNode.DiffNodeTypes.Text,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xTo).LineNumber,
DiffId = string.Empty, // To be filled in by compare method
return false;
if (xTo.GetAttribute(xFrom.LocalName, (!options.IgnoreNamespace) ? xFrom.NamespaceURI : "") != xFrom.Value)
nodeInfo = new XmlDiffNode()
XPath = null,
DiffType = XmlDiffNode.DiffTypes.Changed,
Description = "No matching attribute @" + xFrom.LocalName + " = " + xFrom.Value,
DiffNodeType = XmlDiffNode.DiffNodeTypes.Text,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xTo).LineNumber,
DiffId = string.Empty, // To be filled in by compare method
return false;
} while (xFrom.MoveToNextAttribute());
return true;
private bool MatchElement(XPathNavigator fromNav, XPathNavigator toNav, out XmlDiffNode nodeInfo)
XPathNavigator xFrom = fromNav.Clone();
XPathNavigator xTo = toNav.Clone();
nodeInfo = null;
if (!options.IgnoreNamespace && (xFrom.NamespaceURI != xTo.NamespaceURI))
return false;
if (!options.IgnorePrefix && (xFrom.Prefix != xTo.Prefix))
return false;
if (xFrom.LocalName != xTo.LocalName)
return false;
if (!options.IgnoreAttributes && !MatchAttributes(xFrom, xTo, out nodeInfo))
return false;
return true;
private List<XPathNavigator> SelectSiblings(XPathNavigator fromNav, XPathNavigator toNav, out XmlDiffNode nodeInfo)
XPathNavigator xFrom = fromNav.Clone();
XPathNavigator xTo = toNav.Clone();
List<XPathNavigator> xToList = new List<XPathNavigator>();
nodeInfo = null;
if (xTo.NodeType != XPathNodeType.Element)
if (MatchElement(xFrom, xTo, out nodeInfo))
} while (xTo.MoveToNext(XPathNodeType.Element));
return xToList;
private List<XPathNavigator> SelectAllMatchingSiblings(XPathNavigator fromNav)
XmlDiffNode ni;
return SelectSiblings(fromNav, fromNav, out ni);
private int GetSiblingPosition(XPathNavigator aSibling)
List<XPathNavigator> siblings = SelectAllMatchingSiblings(aSibling);
for (int index = 0; index < siblings.Count; index++)
if (siblings[index].IsSamePosition(aSibling))
return index + 1;
return 0;
private List<XmlDiffNode> CompareNodes(XPathNavigator xmlFromNav, XPathNavigator xmlToNav, string parentDiffId = "")
int diffNumber = 1;
List<XmlDiffNode> diffNodeList = new List<XmlDiffNode>();
Queue<XPathNavigator> xFromQueue = new Queue<XPathNavigator>();
Queue<XPathNavigator> xToQueue = new Queue<XPathNavigator>();
XPathNavigator xFrom = xmlFromNav.Clone();
XPathNavigator xTo = xmlToNav.Clone();
bool isMatch = false;
List<XPathNavigator> xMatch = new List<XPathNavigator>();
while (xFromQueue.Count > 0 && xToQueue.Count > 0)
xFrom = xFromQueue.Dequeue();
xTo = xToQueue.Dequeue();
if (options.IgnoreNodes.Contains(xFrom.NodeType))
XmlDiffNode nodeInfo;
if (!options.IgnoreChildOrder)
isMatch = MatchElement(xFrom, xTo, out nodeInfo);
xMatch = SelectSiblings(xFrom, xTo, out nodeInfo);
if (isMatch || xMatch.Count == 1)
xTo = (isMatch) ? xTo : xMatch[0];
if (xFrom.HasChildren && xTo.HasChildren)
XPathNavigator tempFrom, tempTo;
tempFrom = xFrom.Clone();
tempTo = xTo.Clone();
XmlDiffNode result;
if (!options.IgnoreNodes.Contains(XPathNodeType.Text) && !CompareText(tempFrom, tempTo, out result, ref diffNumber))
else if (xFrom.HasChildren && !xTo.HasChildren)
diffNodeList.Add(new XmlDiffNode
XPath = PrettyPrintXPath(GetXPath(xFrom)),
DiffType = XmlDiffNode.DiffTypes.Removed,
Description = "Node children not found",
DiffNodeType = XmlDiffNode.DiffNodeTypes.Tag,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xTo).LineNumber,
DiffId = (diffNumber++).ToString()
else if (xMatch.Count > 1)
List<Tuple<int, List<XmlDiffNode>>> matchNodes = new List<Tuple<int, List<XmlDiffNode>>>();
foreach (XPathNavigator node in xMatch)
matchNodes.Add(new Tuple<int, List<XmlDiffNode>>(((IXmlLineInfo)node).LineNumber, CompareNodes(xFrom, node, diffNumber.ToString())));
var bestMatchNodes = from node in matchNodes
where node.Item2.Count == matchNodes.OrderBy(node_sub => node_sub.Item2.Count).First().Item2.Count()
select node;
// We only take the first best matching node here, which may not always
// be the most accurate.
Tuple<int, List<XmlDiffNode>> bestMatchNode = bestMatchNodes.First();
bestMatchNode.Item2.ForEach(node => { node.DiffId = (!string.IsNullOrEmpty(parentDiffId) ? parentDiffId + "." : "") + diffNumber.ToString() + "." + node.DiffId; });
if (bestMatchNode.Item2.Count > 0)
diffNodeList.Add(new XmlDiffNode
XPath = PrettyPrintXPath(GetXPath(xFrom)),
DiffType = XmlDiffNode.DiffTypes.Removed,
Description = "No matching node found. Closest matching error info shown:" + Environment.NewLine + PrettyPrintXPath(bestMatchNode.Item2[0].XPath) + Environment.NewLine + bestMatchNode.Item2[0].Description,
DiffNodeType = XmlDiffNode.DiffNodeTypes.Node,
Descendants = (options.MatchDescendants) ? bestMatchNode.Item2 : null,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = (options.MatchDescendants) ? bestMatchNode.Item1 : ((IXmlLineInfo)xTo).LineNumber,
DiffId = (diffNumber++).ToString()
XPathNavigator xToParent = xTo.Clone();
if (nodeInfo != null)
nodeInfo.DiffId = (diffNumber++).ToString();
nodeInfo.XPath = GetXPath(xFrom);
diffNodeList.Add(new XmlDiffNode
XPath = PrettyPrintXPath(GetXPath(xFrom)),
DiffType = XmlDiffNode.DiffTypes.Removed,
Description = "Node not found",
DiffNodeType = XmlDiffNode.DiffNodeTypes.Tag,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xToParent).LineNumber,
DiffId = (diffNumber++).ToString()
} while (xFrom.MoveToNext(XPathNodeType.Element));
return diffNodeList;
private void MergeDiffs(List<XmlDiffNode> fromList, List<XmlDiffNode> mergeList)
foreach (XmlDiffNode node in mergeList)
private bool CompareText(XPathNavigator xmlFromNav, XPathNavigator xmlToNav, out XmlDiffNode result, ref int diffNumber)
XPathNavigator xFrom = xmlFromNav.Clone();
XPathNavigator xTo = xmlToNav.Clone();
result = new XmlDiffNode();
if (xFrom.NodeType == XPathNodeType.Text && xTo.NodeType == XPathNodeType.Text)
if (!CompareTextValue(xFrom.Value, xTo.Value))
result = new XmlDiffNode
XPath = PrettyPrintXPath(GetXPath(xFrom)),
DiffType = XmlDiffNode.DiffTypes.Changed,
Description = "Text node does not match | " + xFrom.Value.Trim() + " => " + xTo.Value.Trim(),
DiffNodeType = XmlDiffNode.DiffNodeTypes.Text,
Origin = fromFilename,
Comparison = toFilename,
OriginLineNo = ((IXmlLineInfo)xFrom).LineNumber,
CompLineNo = ((IXmlLineInfo)xTo).LineNumber,
DiffId = (diffNumber++).ToString()
return false;
return true;
private bool CompareTextValue(string fromValue, string toValue)
if (options.TrimWhitespace)
if (options.StripWhitespace)
fromValue = Regex.Replace(fromValue, @"\s", "");
toValue = Regex.Replace(toValue, @"\s", "");
if (options.MatchValueTypes)
DateTime fromDTResult, toDTResult;
if (DateTime.TryParse(fromValue, out fromDTResult) && DateTime.TryParse(toValue, out toDTResult))
if (fromDTResult == toDTResult || options.IgnoreTextTypes.Contains(XmlDiffOptions.IgnoreTextNodeOptions.XmlDateTime))
return true;
return false;
int iFromResult, iToResult;
if (int.TryParse(fromValue, out iFromResult) && int.TryParse(toValue, out iToResult))
if (iFromResult == iToResult || options.IgnoreTextTypes.Contains(XmlDiffOptions.IgnoreTextNodeOptions.XmlInteger))
return true;
return false;
double dFromResult, dToResult;
if (double.TryParse(fromValue, out dFromResult) && double.TryParse(toValue, out dToResult))
if (dFromResult == dToResult || options.IgnoreTextTypes.Contains(XmlDiffOptions.IgnoreTextNodeOptions.XmlDouble))
return true;
return false;
if (options.IgnoreTextTypes.Contains(XmlDiffOptions.IgnoreTextNodeOptions.XmlString))
return true;
if (options.IgnoreCase)
if (!fromValue.Equals(toValue, StringComparison.OrdinalIgnoreCase))
return false;
if (fromValue != toValue)
return false;
return true;
private string GetXPath(XPathNavigator nav)
Func<XPathNavigator, string> addAttrib =
(node) =>
StringBuilder attribs = new StringBuilder();
XPathNavigator xNode = node.Clone();
if (xNode.HasAttributes)
int count = 0;
attribs.Append("[@" + xNode.LocalName + "=" + "\"" + xNode.Value + "\"]");
if (options.MaxAttributesToDisplay > 0 && count >= options.MaxAttributesToDisplay)
} while (xNode.MoveToNextAttribute());
return attribs.ToString();
XPathNavigator xNav = nav.Clone();
StringBuilder result = new StringBuilder();
if (string.IsNullOrEmpty(xNav.LocalName))
string tempLabel = xNav.LocalName + addAttrib(xNav);
tempLabel += "[" + GetSiblingPosition(xNav) + "]";
tempLabel += "/";
result.Insert(0, tempLabel);
} while (xNav.MoveToParent());
return result.ToString().TrimEnd(new char[] { '/' });
public string PrettyPrintXPath(string xpath)
StringBuilder sb = new StringBuilder();
int depth = 0;
for (int i = 0; i < xpath.Length; i++)
if (xpath[i] == '/')
sb.Append(Environment.NewLine + new string(' ', (++depth * 2)));
return sb.ToString();
private string EscapeQuotes(string s)
StringBuilder result = new StringBuilder();
foreach (char c in s)
if (c == '\"')
return result.ToString();
public string ToCSVString()
Func<List<XmlDiffNode>, string> walkToCsv = null;
walkToCsv =
(diffList) =>
StringBuilder resultCsv = new StringBuilder();
foreach (XmlDiffNode node in diffList)
resultCsv.Append("\"" + node.DiffId + "\",");
switch (node.DiffType)
case XmlDiffNode.DiffTypes.Removed:
case XmlDiffNode.DiffTypes.Added:
case XmlDiffNode.DiffTypes.Changed:
resultCsv.Append("\"" + node.XPath + "\",");
resultCsv.Append("\"" + EscapeQuotes(node.Description) + "\"" + ",");
resultCsv.Append(node.DiffNodeType + ",");
resultCsv.Append(node.OriginLineNo + ",");
resultCsv.Append(node.CompLineNo + ",");
if (node.DiffNodeType == XmlDiffNode.DiffNodeTypes.Node && node.Descendants != null)
return resultCsv.ToString();
return "\"ID\",Result,Tag,Description,Type,\"OriginLineNo\",\"CompLineNo\",Origin,\"Closest Match Tag\",\"Closest Match XPath\"\r\n" + walkToCsv(DiffNodeList);
public override string ToString()
const int IndentSize = 3;
// define and assign delegate before definition so that
// the delegate is captured by the recursive call
Func<List<XmlDiffNode>, int, string> walkToString = null;
walkToString =
(diffList, depth) =>
StringBuilder diffLine = new StringBuilder();
foreach (XmlDiffNode node in diffList)
string[] lines = ToString(node).Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
diffLine.AppendLine(new String(' ', depth * IndentSize) + lines[0]);
diffLine.AppendLine(new String(' ', depth * IndentSize) + lines[1]);
diffLine.AppendLine(new String(' ', depth * IndentSize) + lines[2]);
if (node.DiffNodeType == XmlDiffNode.DiffNodeTypes.Node && node.Descendants != null)
diffLine.AppendLine(new String(' ', (depth + 1) * IndentSize) + "{");
diffLine.AppendLine(walkToString(node.Descendants, depth + 1).TrimEnd('\r', '\n'));
diffLine.AppendLine(new String(' ', (depth + 1) * IndentSize) + "}");
return diffLine.ToString();
return walkToString(DiffNodeList, 0);
public string ToString(XmlDiffNode node)
StringBuilder diffLine = new StringBuilder();
switch (node.DiffType)
case XmlDiffNode.DiffTypes.Removed:
diffLine.Append(" (-) ");
case XmlDiffNode.DiffTypes.Added:
diffLine.Append(" (+) ");
case XmlDiffNode.DiffTypes.Changed:
diffLine.Append(" (*) ");
diffLine.AppendLine(" XPath: " + node.XPath);
diffLine.AppendLine(" ===>>> Diff ID: " + node.DiffId);
diffLine.AppendLine(" ===>>> Description: " + node.Description);
diffLine.AppendLine(" ===>>> Node Type: " + node.DiffNodeType);
diffLine.AppendLine(" ===>>> Origin Line No: " + node.OriginLineNo);
diffLine.AppendLine(" ===>>> Comp Line No: " + node.CompLineNo);
return diffLine.ToString();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment