Created
February 3, 2021 21:06
-
-
Save cisaacstern/3f19b4913ada13cb3f864c1ec88b7ec0 to your computer and use it in GitHub Desktop.
Minimal example of parsing metadata from a USGS xml file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Metadata used in this example is retrieved: https://www.sciencebase.gov/catalog/item/4f70aa71e4b058caae3f8de1 | |
# | |
from xml2dict import XmlDictConfig | |
meta_dir = 'data_meta' | |
meta_files = os.listdir(meta_dir) | |
tree = ET.parse(meta_dir + '/' + meta_files[0]) | |
root = tree.getroot() | |
xmldict = XmlDictConfig(root) | |
b = xmldict['idinfo']['spdom']['bounding'] | |
b = {key:float(val) for key, val in zip(b.keys(),b.values())} | |
b = {key:int(np.around(val, decimals=0)) for key, val in zip(b.keys(),b.values())} | |
latb = (b['southbc'], b['northbc']) | |
lonb = (b['westbc'], b['eastbc']) | |
print(latb, lonb) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Referenced from this SO thread: https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary | |
# | |
class XmlListConfig(list): | |
def __init__(self, aList): | |
for element in aList: | |
if element: | |
# treat like dict | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
self.append(XmlDictConfig(element)) | |
# treat like list | |
elif element[0].tag == element[1].tag: | |
self.append(XmlListConfig(element)) | |
elif element.text: | |
text = element.text.strip() | |
if text: | |
self.append(text) | |
class XmlDictConfig(dict): | |
''' | |
Example usage: | |
>>> tree = ElementTree.parse('your_file.xml') | |
>>> root = tree.getroot() | |
>>> xmldict = XmlDictConfig(root) | |
Or, if you want to use an XML string: | |
>>> root = ElementTree.XML(xml_string) | |
>>> xmldict = XmlDictConfig(root) | |
And then use xmldict for what it is... a dict. | |
''' | |
def __init__(self, parent_element): | |
if parent_element.items(): | |
self.update(dict(parent_element.items())) | |
for element in parent_element: | |
if element: | |
# treat like dict - we assume that if the first two tags | |
# in a series are different, then they are all different. | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
aDict = XmlDictConfig(element) | |
# treat like list - we assume that if the first two tags | |
# in a series are the same, then the rest are the same. | |
else: | |
# here, we put the list in dictionary; the key is the | |
# tag name the list elements all share in common, and | |
# the value is the list itself | |
aDict = {element[0].tag: XmlListConfig(element)} | |
# if the tag has attributes, add those to the dict | |
if element.items(): | |
aDict.update(dict(element.items())) | |
self.update({element.tag: aDict}) | |
# this assumes that if you've got an attribute in a tag, | |
# you won't be having any text. This may or may not be a | |
# good idea -- time will tell. It works for the way we are | |
# currently doing XML configuration files... | |
elif element.items(): | |
self.update({element.tag: dict(element.items())}) | |
# finally, if there are no child tags and no attributes, extract | |
# the text | |
else: | |
self.update({element.tag: element.text}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment