Skip to content

Instantly share code, notes, and snippets.

@Mooophy
Last active September 28, 2017 08:51
Show Gist options
  • Save Mooophy/d1efa860104b3df0a22479cd59ae67e9 to your computer and use it in GitHub Desktop.
Save Mooophy/d1efa860104b3df0a22479cd59ae67e9 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Xml.Schema;
public static class MyExtensions
{
public static string ToStringAlignAttributes(this XDocument document)
{
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.OmitXmlDeclaration = true;
settings.NewLineOnAttributes = true;
StringBuilder stringBuilder = new StringBuilder();
using (XmlWriter xmlWriter = XmlWriter.Create(stringBuilder, settings))
document.WriteTo(xmlWriter);
return stringBuilder.ToString();
}
}
class Program
{
private static class Xsi
{
public static XNamespace xsi = "http://www.w3.org/2001/XMLSchema-instance";
public static XName schemaLocation = xsi + "schemaLocation";
public static XName noNamespaceSchemaLocation = xsi + "noNamespaceSchemaLocation";
}
public static XDocument Normalize(XDocument source, XmlSchemaSet schema)
{
bool havePSVI = false;
// validate, throw errors, add PSVI information
if (schema != null)
{
source.Validate(schema, null, true);
havePSVI = true;
}
return new XDocument(
source.Declaration,
source.Nodes().Select(n =>
{
// Remove comments, processing instructions, and text nodes that are
// children of XDocument. Only white space text nodes are allowed as
// children of a document, so we can remove all text nodes.
if (n is XComment || n is XProcessingInstruction || n is XText)
return null;
XElement e = n as XElement;
if (e != null)
return NormalizeElement(e, havePSVI);
return n;
}
)
);
}
public static bool DeepEqualsWithNormalization(XDocument doc1, XDocument doc2,
XmlSchemaSet schemaSet)
{
XDocument d1 = Normalize(doc1, schemaSet);
XDocument d2 = Normalize(doc2, schemaSet);
return XNode.DeepEquals(d1, d2);
}
private static IEnumerable<XAttribute> NormalizeAttributes(XElement element,bool havePSVI)
{
return element
.Attributes()
.Where(a => !a.IsNamespaceDeclaration && a.Name != Xsi.schemaLocation && a.Name != Xsi.noNamespaceSchemaLocation)
.OrderBy(a => a.Name.NamespaceName)
.ThenBy(a => a.Name.LocalName)
.Select(
a =>
{
if (havePSVI)
{
var dt = a.GetSchemaInfo().SchemaType.TypeCode;
switch (dt)
{
case XmlTypeCode.Boolean:
return new XAttribute(a.Name, (bool)a);
case XmlTypeCode.DateTime:
return new XAttribute(a.Name, (DateTime)a);
case XmlTypeCode.Decimal:
return new XAttribute(a.Name, (decimal)a);
case XmlTypeCode.Double:
return new XAttribute(a.Name, (double)a);
case XmlTypeCode.Float:
return new XAttribute(a.Name, (float)a);
case XmlTypeCode.HexBinary:
case XmlTypeCode.Language:
return new XAttribute(a.Name,
((string)a).ToLower());
}
}
return a;
}
);
}
private static XNode NormalizeNode(XNode node, bool havePSVI)
{
// trim comments and processing instructions from normalized tree
if (node is XComment || node is XProcessingInstruction)
return null;
XElement e = node as XElement;
if (e != null)
return NormalizeElement(e, havePSVI);
// Only thing left is XCData and XText, so clone them
return node;
}
private static XElement NormalizeElement(XElement element, bool havePSVI)
{
if (havePSVI)
{
var dt = element.GetSchemaInfo();
switch (dt.SchemaType.TypeCode)
{
case XmlTypeCode.Boolean:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
(bool)element);
case XmlTypeCode.DateTime:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
(DateTime)element);
case XmlTypeCode.Decimal:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
(decimal)element);
case XmlTypeCode.Double:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
(double)element);
case XmlTypeCode.Float:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
(float)element);
case XmlTypeCode.HexBinary:
case XmlTypeCode.Language:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
((string)element).ToLower());
default:
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
element.Nodes().Select(n => NormalizeNode(n, havePSVI))
);
}
}
else
{
return new XElement(element.Name,
NormalizeAttributes(element, havePSVI),
element.Nodes().Select(n => NormalizeNode(n, havePSVI))
);
}
}
class Test
{
public int TestNumber;
public string Description;
public string Document1;
public string Document2;
public string Schema;
public bool ExpectedResult;
}
static void Main(string[] args)
{
List<Test> testList = new List<Test>()
{
new Test
{
TestNumber = 1,
Description =
"One tree is in default namespace. Other is in a namespace with a prefix.",
Document1 =
@"<Root xmlns='http://www.northwind.com'>
<Child>1</Child>
</Root>",
Document2 =
@"<n:Root xmlns:n='http://www.northwind.com'>
<n:Child>1</n:Child>
</n:Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 2,
Description = "Variation on namespace prefixes.",
Document1 =
@"<Root xmlns='http://www.northwind.com'>
<a:Child xmlns:a='http://www.adventureworks.com'>1</a:Child>
</Root>",
Document2 =
@"<Root xmlns='http://www.northwind.com'>
<Child xmlns='http://www.adventureworks.com'>1</Child>
</Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 3,
Description = "Attributes are not ordered.",
Document1 =
@"<Root a='1' b='2'>
<Child>1</Child>
</Root>",
Document2 =
@"<Root b='2' a='1'>
<Child>1</Child>
</Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 4,
Description = "Attributes are not ordered, take 2.",
Document1 =
@"<Root a='1' b='2'>
<Child a='a' b='b' c='c' d='d'>1</Child>
</Root>",
Document2 =
@"<Root b='2' a='1'>
<Child d='d' c='c' b='b' a='a'>1</Child>
</Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 5,
Description = "One tree has a comment. Other does not.",
Document1 = "<Root><!--Comment--></Root>",
Document2 = "<Root></Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 6,
Description = "One tree has comment and PI., other does not.",
Document1 =
@"<Root>
<!--Comment-->
<?xml-stylesheet href='mystyle.css' type='text/css'?>
<Child></Child>
</Root>",
Document2 = "<Root><Child></Child></Root>",
Schema = null,
ExpectedResult = true
},
new Test
{
TestNumber = 7,
Description =
"Element is data type of xsd:double, values are equal when normalized.",
Document1 = "<Root>25</Root>",
Document2 = "<Root>+25</Root>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Root' type='xsd:double'/>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 8,
Description =
"Element is data type of xsd:double, values are equal when normalized.",
Document1 =
@"<Root>
<Child>+25e+01</Child>
<Child>+50.0000</Child>
</Root>",
Document2 =
@"<Root>
<Child>250</Child>
<Child>5e1</Child>
</Root>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Root'>
<xsd:complexType mixed='true'>
<xsd:choice>
<xsd:element
name='Child'
minOccurs='0'
maxOccurs='unbounded'
type='xsd:double'/>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 9,
Description = "Variations in value representations.",
Document1 =
@"<Root>
<ABooleanElement>1</ABooleanElement>
<ADateTimeElement>2009-01-21T18:50:59.0000000-08:00</ADateTimeElement>
<ADecimalElement>1.0</ADecimalElement>
<ADoubleElement>1.0</ADoubleElement>
<AFloatElement>1.0</AFloatElement>
</Root>",
Document2 =
@"<Root>
<ABooleanElement>true</ABooleanElement>
<ADateTimeElement>2009-01-21T18:50:59-08:00</ADateTimeElement>
<ADecimalElement>1.0</ADecimalElement>
<ADoubleElement>1</ADoubleElement>
<AFloatElement>1</AFloatElement>
</Root>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Root'>
<xsd:complexType>
<xsd:all>
<xsd:element name='ABooleanElement' minOccurs='1' maxOccurs='1'>
<xsd:complexType>
<xsd:simpleContent>
<xsd:extension base='xsd:boolean'>
<xsd:attribute name='ADefaultBooleanAttribute' default='false'/>
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
</xsd:element>
<xsd:element name='ADateTimeElement' minOccurs='1' maxOccurs='1'
type='xsd:dateTime'/>
<xsd:element name='ADecimalElement' minOccurs='1' maxOccurs='1'
type='xsd:decimal'/>
<xsd:element name='ADoubleElement' minOccurs='1' maxOccurs='1'
type='xsd:double'/>
<xsd:element name='AFloatElement' minOccurs='1' maxOccurs='1'
type='xsd:float'/>
</xsd:all>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 10,
Description =
"Variations in value representations.",
Document1 =
@"<Root>
<Child>
<A>1</A>
<B>1.0</B>
<C>1.0</C>
<D>2009-01-21T18:50:59-08:00</D>
</Child>
<Child>
<A>1</A>
<B>1.0</B>
<C>1.0</C>
<D>2009-01-21T18:50:59-08:00</D>
</Child>
<Child>
<A>1</A>
<B>1.0</B>
<C>1.0</C>
<D>2009-01-21T18:50:59-08:00</D>
</Child>
</Root>",
Document2 =
@"<Root>
<Child>
<A>1</A>
<B>1.0</B>
<C>1.0</C>
<D>2009-01-21T18:50:59.0000000-08:00</D>
</Child>
<Child>
<A>1</A>
<B>1</B>
<C>1</C>
<D>2009-01-21T18:50:59.0000000-08:00</D>
</Child>
<Child>
<A>1</A>
<B>1.0</B>
<C>1.0</C>
<D>2009-01-21T18:50:59.0000000-08:00</D>
</Child>
</Root>",
Schema =
@"<xs:schema attributeFormDefault='unqualified' elementFormDefault='qualified'
xmlns:xs='http://www.w3.org/2001/XMLSchema'>
<xs:element name='Root'>
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs='unbounded' name='Child'>
<xs:complexType>
<xs:all>
<xs:element name='B' type='xs:float' />
<xs:element name='A' type='xs:unsignedByte' />
<xs:element name='C' type='xs:float' />
<xs:element name='D' type='xs:dateTime' />
</xs:all>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 11,
Description =
"noNamespaceSchemaLocation",
Document1 = "<Text><b></b><i></i></Text>",
Document2 =
@"<Text xsi:noNamespaceSchemaLocation='http://adventure-works.com/schemas/paragraph.xsd'
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><b></b><i></i></Text>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Text'>
<xsd:complexType>
<xsd:all>
<xsd:element name='b'/>
<xsd:element name='i'/>
</xsd:all>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 12,
Description = "hexBinary and language data types",
Document1 =
"<Text><b>3f3c6d78206c657673726f693d6e3122302e20226e</b><l>en-US</l></Text>",
Document2 =
"<Text><b>3F3C6D78206C657673726F693D6E3122302E20226E</b><l>en-us</l></Text>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Text'>
<xsd:complexType>
<xsd:all>
<xsd:element name='b' type='xsd:hexBinary'/>
<xsd:element name='l' type='xsd:language'/>
</xsd:all>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 13,
Description = "Attributes of various types, values not normalized.",
Document1 =
@"<Root ABoolean='true'
AFloat='1.0'
ADecimal='1.00'
ADouble='1.0'
ADateTime='2009-01-21T18:50:59-08:00'
AHexBinary='abcd1234'
ALanguage='en-us'>
<Child XBoolean='true'
XFloat='1.0'
XDecimal='1.00'
XDouble='1.0'
XDateTime='2009-01-21T18:50:59-08:00'
XHexBinary='abcd1234'
XLanguage='en-us'/>
</Root>",
Document2 =
@"<Root ABoolean='true'
AFloat='+1'
ADecimal='1.00'
ADouble='+1e+0'
ADateTime='2009-01-21T18:50:59.00-08:00'
AHexBinary='ABCD1234'
ALanguage='EN-US'>
<Child XBoolean='true'
XFloat='1.0'
XDecimal='1.00'
XDouble='+1e+0'
XDateTime='2009-01-21T18:50:59-08:00'
XHexBinary='ABCD1234'
XLanguage='en-US'/>
</Root>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Root'>
<xsd:complexType>
<xsd:all>
<xsd:element name='Child' minOccurs='1' maxOccurs='1'>
<xsd:complexType>
<xsd:attribute name='XBoolean' type='xsd:boolean'/>
<xsd:attribute name='XFloat' type='xsd:float'/>
<xsd:attribute name='XDecimal' type='xsd:decimal'/>
<xsd:attribute name='XDouble' type='xsd:double'/>
<xsd:attribute name='XDateTime' type='xsd:dateTime'/>
<xsd:attribute name='XHexBinary' type='xsd:hexBinary'/>
<xsd:attribute name='XLanguage' type='xsd:language'/>
</xsd:complexType>
</xsd:element>
</xsd:all>
<xsd:attribute name='ABoolean' type='xsd:boolean'/>
<xsd:attribute name='AFloat' type='xsd:float'/>
<xsd:attribute name='ADecimal' type='xsd:decimal'/>
<xsd:attribute name='ADouble' type='xsd:double'/>
<xsd:attribute name='ADateTime' type='xsd:dateTime'/>
<xsd:attribute name='AHexBinary' type='xsd:hexBinary'/>
<xsd:attribute name='ALanguage' type='xsd:language'/>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
},
new Test
{
TestNumber = 14,
Description = "Element has a default attribute.",
Document1 = "<Root/>",
Document2 = "<Root ADefaultBooleanAttribute='false'/>",
Schema =
@"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='Root'>
<xsd:complexType>
<xsd:simpleContent>
<xsd:extension base='xsd:string'>
<xsd:attribute name='ADefaultBooleanAttribute' default='false'/>
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
</xsd:element>
</xsd:schema>",
ExpectedResult = true
}
};
bool detailed = true;
int startTest = 0;
int endTest = Int32.MaxValue;
foreach (var test in testList.Where(t => t.TestNumber >= startTest && t.TestNumber <= endTest))
{
Console.WriteLine("Test: {0} {1}", test.TestNumber, test.Description);
XmlSchemaSet schemaSet = null;
if (test.Schema != null)
{
schemaSet = new XmlSchemaSet();
schemaSet.Add("", XmlReader.Create(new StringReader(test.Schema)));
}
XDocument d1 = XDocument.Parse(test.Document1);
XDocument d2 = XDocument.Parse(test.Document2);
if (detailed)
{
Console.WriteLine("Document1 before normalization");
Console.WriteLine(d1.ToStringAlignAttributes());
Console.WriteLine("--------------------------------");
Console.WriteLine("Document1 after normalization");
Console.WriteLine(Normalize(d1, schemaSet).ToStringAlignAttributes());
Console.WriteLine("--------------------------------");
Console.WriteLine("Document2 before normalization");
Console.WriteLine(d2.ToStringAlignAttributes());
Console.WriteLine("--------------------------------");
Console.WriteLine("Document2 after normalization");
Console.WriteLine(Normalize(d2, schemaSet).ToStringAlignAttributes());
Console.WriteLine("--------------------------------");
}
if (DeepEqualsWithNormalization(d1, d2, schemaSet) == test.ExpectedResult)
Console.WriteLine("PASSED");
else
Console.WriteLine("****************** FAILED ******************");
Console.WriteLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment