Skip to content

Instantly share code, notes, and snippets.

@luipir
Last active August 29, 2019 15:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luipir/8016e36bca8cac57ffeddebec77f7026 to your computer and use it in GitHub Desktop.
Save luipir/8016e36bca8cac57ffeddebec77f7026 to your computer and use it in GitHub Desktop.
Pure python and GDAL metadata extractor
from xml.etree import cElementTree as ElementTree
from osgeo import gdal
imgpath = '???????????<put the image path here>????????????'
###############################################
# XML to dict parsing code get from:
# https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
'''
Example usage:
>>> tree = ElementTree.parse('your_file.xml')
>>> root = tree.getroot()
>>> xmldict = XmlDictConfig(root)
Or, if you want to use an XML string:
>>> root = ElementTree.XML(xml_string)
>>> xmldict = XmlDictConfig(root)
And then use xmldict for what it is... a dict.
'''
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
# treat like dict - we assume that if the first two tags
# in a series are different, then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same, then the rest are the same.
else:
# here, we put the list in dictionary; the key is the
# tag name the list elements all share in common, and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes, add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,
# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally, if there are no child tags and no attributes, extract
# the text
else:
self.update({element.tag: element.text})
###############################################
gdal.UseExceptions()
dataFrame = gdal.Open(imgpath, gdal.GA_ReadOnly)
domains = dataFrame.GetMetadataDomainList()
exif = dataFrame.GetMetadata()
for key, value in exif.items():
print("EXIF --", key, ":", value)
pass
for domain in domains:
metadata = dataFrame.GetMetadata(domain)
if isinstance(metadata, dict):
for key, value in metadata.items():
print(domain, "--", key, ":", value)
pass
if isinstance(metadata, list):
if domain == 'xml:XMP':
# parse xml
root = ElementTree.XML(metadata[0])
xmldict = XmlDictConfig(root)
#print(xmldict)
# skip first two element
subdict = list(xmldict.values())[0]
subdict = list(subdict.values())[0]
# parse XMP stuffs
for key, value in subdict.items():
print(domain, '--', key, value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment