Skip to content

Instantly share code, notes, and snippets.

@dgroft
Created November 22, 2013 15:17
Show Gist options
  • Save dgroft/7601523 to your computer and use it in GitHub Desktop.
Save dgroft/7601523 to your computer and use it in GitHub Desktop.
Applies XSL transform to XML returned from an HTTP request, then validates it against an XSD/schema.
from __future__ import print_function
import argparse
import lxml.etree as ET
import urllib
parser = argparse.ArgumentParser(description="Transforms and Validates XML")
parser.add_argument("--feeds", help="the text file that holds the urls to be validated", default="feeds.txt")
parser.add_argument("--log", help="the output log file that reports all errors", default="log.txt")
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
args = parser.parse_args()
def getDOM(feedUrl):
# hit the url (fetch the xml), then parse it into a dom object
return ET.parse(urllib.urlopen(feedUrl))
def getXslt(feedUrl):
# examine the feedUrl, determine which XSLT to use
if "myspecialfeeds/rest/abc" in feedUrl: return "xslts/Abc.xslt"
if "myspecialfeeds/rest/def" in feedUrl: return "xslts/Def.xslt"
if "myspecialfeeds/rest/ghi" in feedUrl: return "xslts/Ghi.xslt"
return
def applyXslt(dom, xsltName):
# parse the xsl transform into an object, then apply it to the incoming dom
transform = ET.XSLT(ET.parse(xsltName))
return transform(dom)
def getXsd(feedUrl):
# examine the feedUrl, determine which XSD to use
if "myspecialfeeds/rest/abc" in feedUrl: return "xsds/Abc.xsd"
if "myspecialfeeds/rest/def" in feedUrl: return "xsds/Def.xsd"
if "myspecialfeeds/rest/ghi" in feedUrl: return "xsds/Ghi.xsd"
return
def applyXsd(transformedXml, xsdName):
# parse the xsd into an object, then validate the incoming dom against it
xmlschema = ET.XMLSchema(ET.parse(xsdName))
xmlschema.assertValid(transformedXml)
return
with open(args.log, "a") as logFile:
with open(args.feeds, "r") as feedsFile:
print()
for feedLine in feedsFile:
# strip out the line terminator
feedUrl = feedLine.rstrip("\n")
# if url is commented out using '#', skip line
if feedUrl.startswith("#"): continue
# if the line is empty, skip line
if not feedUrl: continue
print("Validating: " + feedUrl)
try:
# fetch xml from feed via url
dom = getDOM(feedUrl)
# only print the xml if verbose logging is enabled
if args.verbose: print(ET.tostring(dom, pretty_print=True))
# determine which xsl transform to apply
xslt = getXslt(feedUrl)
# apply the xsl transform
transformed_xml = applyXslt(dom, xslt)
# only print the transformed xml if verbose logging is enabled
if args.verbose: print(ET.tostring(transformed_xml, pretty_print=True))
# determine which xsd to validate transformed xml against
xsd = getXsd(feedUrl)
# validate the xml against the xsd
applyXsd(transformed_xml, xsd)
print("Feed is valid.")
except Exception, e:
print("Feed is INVALID! (See log for details.)")
print("Invalid feed URL: " + feedUrl, file=logFile)
print(e, file=logFile)
if args.verbose:
print("Feed URL returned the following XML:", file=logFile)
print(ET.tostring(dom, pretty_print=True), file=logFile)
print("Feed URL XML transformed to:", file=logFile)
print(ET.tostring(transformed_xml, pretty_print=True), file=logFile)
print("", file=logFile)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment