Skip to content

Instantly share code, notes, and snippets.

@Fifan31
Created January 5, 2016 09:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Fifan31/39296e124e14b7f7153f to your computer and use it in GitHub Desktop.
Save Fifan31/39296e124e14b7f7153f to your computer and use it in GitHub Desktop.
import lxml.etree as le
from operator import attrgetter
import os
import tempfile
import difflib
def sortbyid(elem):
'''Function to sort XML elements by id
(where the elements have an 'id' attribute that can be cast to an int)
'''
_id = elem.get('id')
if _id:
try:
return int(_id)
except ValueError:
return 0
return 0
def sortbytext(elem):
'''Function to sort XML elements by their text contents
'''
text = elem.text
if text:
return text
else:
return ''
def sortAttrs(item, sorteditem):
'''Function to sort XML attributes alphabetically by key
The original item is left unmodified, and it's attributes are
copied to the provided sorteditem
'''
attrkeys = sorted(item.keys())
for key in attrkeys:
sorteditem.set(key, item.get(key))
def sortElements(items, newroot):
''' Function to sort XML elements
The sorted elements will be added as children of the provided newroot
This is a recursive function, and will be called on each of the children
of items.
'''
# The intended sort order is to sort by XML element name
# If more than one element has the same name, we want to
# sort by their text contents.
# If more than one element has the same name and they do
# not contain any text contents, we want to sort by the
# value of their ID attribute.
# If more than one element has the same name, but has
# no text contents or ID attribute, their order is left
# unmodified.
#
# We do this by performing three sorts in the reverse order
items = sorted(items, key=sortbyid)
items = sorted(items, key=sortbytext)
items = sorted(items, key=attrgetter('tag'))
# Once sorted, we sort each of the items
for item in items:
# Create a new item to represent the sorted version
# of the next item, and copy the tag name and contents
newitem = le.Element(item.tag)
if item.text and not item.text.isspace():
newitem.text = item.text
# Copy the attributes (sorted by key) to the new item
sortAttrs(item, newitem)
# Copy the children of item (sorted) to the new item
sortElements(list(item), newitem)
# Append this sorted item to the sorted root
newroot.append(newitem)
def sortFile(fileobj):
''' Function to sort the provided XML file
fileobj.filename will be left untouched
A new sorted copy of it will be created at fileobj.tmpfilename
'''
with open(fileobj['filename'], 'r') as original:
# parse the XML file and get a pointer to the top
xmldoc = le.parse(original)
xmlroot = xmldoc.getroot()
# create a new XML element that will be the top of
# the sorted copy of the XML file
newxmlroot = le.Element(xmlroot.tag)
# create the sorted copy of the XML file
sortAttrs(xmlroot, newxmlroot)
sortElements(list(xmlroot), newxmlroot)
# write the sorted XML file to the temp file
newtree = le.ElementTree(newxmlroot)
with open(fileobj['tmpfilename'], 'wb') as newfile:
newtree.write(newfile, pretty_print=True)
def createFileObj(prefix, name):
''' Prepares the location of the temporary file that will be created by xmldiff
'''
return {
"filename": os.path.abspath(name),
"tmpfilename": tempfile.NamedTemporaryFile(suffix=os.path.basename(name),
prefix=prefix, delete=True).name
}
def compare(expected, actual):
# sort each of the specified files
filefrom = createFileObj("from", expected)
sortFile(filefrom)
fileto = createFileObj("to", actual)
sortFile(fileto)
with open(filefrom['tmpfilename'], mode='r') as expFile, open(fileto['tmpfilename'], mode='r') as actFile:
expectedLines = expFile.readlines()
actualLines = actFile.readlines()
unified_diff = difflib.unified_diff(expectedLines, actualLines, filefrom['filename'],
fileto['filename'])
os.remove(expFile.name)
os.remove(actFile.name)
return unified_diff
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment