Skip to content

Instantly share code, notes, and snippets.

Created March 17, 2017 17:28
Show Gist options
  • Save mrmiguez/068a1d49daf5bbdbbddedef0887f5c97 to your computer and use it in GitHub Desktop.
Save mrmiguez/068a1d49daf5bbdbbddedef0887f5c97 to your computer and use it in GitHub Desktop.
import requests
# requests -
# the simplest HTTP GET/POST utility I've found
from lxml import etree
# an XML library written in C
NS = { None: "",
"mads": "",
"xlink": "",
"xsi": "" }
# above we're storing all the namespace definitions we'll need as a python dictionary
in_file = open('exURIs.txt', 'r') # create and open a file object. the 'r' flag is for read
with open('exMADSCollection.xml', 'w') as out_file: # create another file object, this time in 'w'=write mode
madsRoot = etree.Element('{}madsCollection', # use the lxml.etree.Element method to create our root
version="2.0", # attributes can be added as keyword=value arguments
nsmap=NS) # apply all namespaces from our dict to this element
for uri in in_file: # using our in_file object, let's iterate over each line in the file
MADSrequest = requests.get(uri.rstrip('\r\n') + '.madsxml.xml') # str.rstrip cleans up any errant newline characters
# here we make the actual http request
# since many types of files live at our URI
# we just add the extension of the one we
# want to the end, i.e. +'.html', +'.json',
# +'.rdf', etc.
if MADSrequest.status_code == 200: # check if we actually got something by HTTP status code
madsChild = etree.XML(MADSrequest.text) # parse the received text as XML
madsRoot.append(madsChild) # lxml stores parent/child relationships as lists.
# to add our new XML as a child to root, we use
# use the list.append() method
out_file.write(etree.tostring(madsRoot, # write to our out_file object the results of lxml.etree.tostring passing in our root element
encoding="UTF-8").decode('utf-8')) # let's overkill it on the utf-8... b/c I really want it to be utf-8 encoded
# the very first thing we did was open in_file. it's good practice to close
# everything you open.
# the out_file object does not need to be closed. sinced we called it in a
# while loop, python will close it automatically when the loop is exited
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment