Last active
August 29, 2019 15:21
-
-
Save luipir/8016e36bca8cac57ffeddebec77f7026 to your computer and use it in GitHub Desktop.
Pure python and GDAL metadata extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xml.etree import cElementTree as ElementTree | |
from osgeo import gdal | |
imgpath = '???????????<put the image path here>????????????' | |
############################################### | |
# XML to dict parsing code get from: | |
# https://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary | |
class XmlListConfig(list): | |
def __init__(self, aList): | |
for element in aList: | |
if element: | |
# treat like dict | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
self.append(XmlDictConfig(element)) | |
# treat like list | |
elif element[0].tag == element[1].tag: | |
self.append(XmlListConfig(element)) | |
elif element.text: | |
text = element.text.strip() | |
if text: | |
self.append(text) | |
class XmlDictConfig(dict): | |
''' | |
Example usage: | |
>>> tree = ElementTree.parse('your_file.xml') | |
>>> root = tree.getroot() | |
>>> xmldict = XmlDictConfig(root) | |
Or, if you want to use an XML string: | |
>>> root = ElementTree.XML(xml_string) | |
>>> xmldict = XmlDictConfig(root) | |
And then use xmldict for what it is... a dict. | |
''' | |
def __init__(self, parent_element): | |
if parent_element.items(): | |
self.update(dict(parent_element.items())) | |
for element in parent_element: | |
if element: | |
# treat like dict - we assume that if the first two tags | |
# in a series are different, then they are all different. | |
if len(element) == 1 or element[0].tag != element[1].tag: | |
aDict = XmlDictConfig(element) | |
# treat like list - we assume that if the first two tags | |
# in a series are the same, then the rest are the same. | |
else: | |
# here, we put the list in dictionary; the key is the | |
# tag name the list elements all share in common, and | |
# the value is the list itself | |
aDict = {element[0].tag: XmlListConfig(element)} | |
# if the tag has attributes, add those to the dict | |
if element.items(): | |
aDict.update(dict(element.items())) | |
self.update({element.tag: aDict}) | |
# this assumes that if you've got an attribute in a tag, | |
# you won't be having any text. This may or may not be a | |
# good idea -- time will tell. It works for the way we are | |
# currently doing XML configuration files... | |
elif element.items(): | |
self.update({element.tag: dict(element.items())}) | |
# finally, if there are no child tags and no attributes, extract | |
# the text | |
else: | |
self.update({element.tag: element.text}) | |
############################################### | |
gdal.UseExceptions() | |
dataFrame = gdal.Open(imgpath, gdal.GA_ReadOnly) | |
domains = dataFrame.GetMetadataDomainList() | |
exif = dataFrame.GetMetadata() | |
for key, value in exif.items(): | |
print("EXIF --", key, ":", value) | |
pass | |
for domain in domains: | |
metadata = dataFrame.GetMetadata(domain) | |
if isinstance(metadata, dict): | |
for key, value in metadata.items(): | |
print(domain, "--", key, ":", value) | |
pass | |
if isinstance(metadata, list): | |
if domain == 'xml:XMP': | |
# parse xml | |
root = ElementTree.XML(metadata[0]) | |
xmldict = XmlDictConfig(root) | |
#print(xmldict) | |
# skip first two element | |
subdict = list(xmldict.values())[0] | |
subdict = list(subdict.values())[0] | |
# parse XMP stuffs | |
for key, value in subdict.items(): | |
print(domain, '--', key, value) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment