Created
March 3, 2021 14:57
-
-
Save gannebamm/965628b5e71361f7010a7b5b9dff1586 to your computer and use it in GitHub Desktop.
Enable XML metadata parsing from documents for GeoNode. See https://github.com/GeoNode/geonode/issues/6876
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: geonode/documents/views.py | |
IDEA additional info: | |
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP | |
<+>UTF-8 | |
=================================================================== | |
diff --git a/geonode/documents/views.py b/geonode/documents/views.py | |
--- a/geonode/documents/views.py (revision a5a217bd49e7f91b69ace14a54cdb7219fa5d0dc) | |
+++ b/geonode/documents/views.py (revision 9e436c39e470b5a8aae6304f4a62ccbe00f4cf1c) | |
@@ -49,7 +49,8 @@ | |
from geonode.base.forms import CategoryForm, TKeywordForm | |
from geonode.base.models import ( | |
Thesaurus, | |
- TopicCategory) | |
+ TopicCategory, | |
+ ResourceBase) | |
from geonode.documents.enumerations import DOCUMENT_TYPE_MAP, DOCUMENT_MIMETYPE_MAP | |
from geonode.documents.models import Document, get_related_resources | |
from geonode.documents.forms import DocumentForm, DocumentCreateForm, DocumentReplaceForm | |
@@ -62,6 +63,12 @@ | |
from dal import autocomplete | |
+# these two should get refactored into the base model | |
+from geonode.layers.metadata import set_metadata | |
+from geonode.layers.utils import resolve_regions | |
+ | |
+from geonode.base.models import SpatialRepresentationType | |
+ | |
logger = logging.getLogger("geonode.documents.views") | |
ALLOWED_DOC_TYPES = settings.ALLOWED_DOCUMENT_TYPES | |
@@ -238,6 +245,8 @@ | |
form.save_many2many() | |
self.object.set_permissions(form.cleaned_data['permissions']) | |
+ identifier = None | |
+ title = None | |
abstract = None | |
date = None | |
regions = [] | |
@@ -258,6 +267,52 @@ | |
except Exception: | |
logger.error("Exif extraction failed.") | |
+ # extract XML metadata | |
+ if self.object.mime_type == 'application/xml' \ | |
+ and getattr(settings, 'EXTRACT_METADATA_FROM_XML_DOCS', True): | |
+ try: | |
+ # set metadata | |
+ with open(self.object.doc_file.path) as f: | |
+ xml_file = f.read() | |
+ | |
+ # get model properties from XML | |
+ identifier, vals, regions, keywords = set_metadata(xml_file) | |
+ | |
+ metadata = {} | |
+ | |
+ metadata['metadata_xml'] = xml_file | |
+ metadata['uuid'] = identifier | |
+ | |
+ for key, value in vals.items(): | |
+ if key == 'spatial_representation_type': | |
+ value = SpatialRepresentationType(identifier=value) | |
+ elif key == 'topic_category': | |
+ value, created = TopicCategory.objects.get_or_create( | |
+ identifier=value.lower(), | |
+ defaults={'description': '', 'gn_description': value}) | |
+ key = 'category' | |
+ metadata[key] = value | |
+ except Exception: | |
+ logger.error("XML metadata extraction failed.") | |
+ | |
+ # use values to populate Document fields | |
+ title = metadata['title'] | |
+ abstract = metadata['abstract'] | |
+ date = metadata['date'] | |
+ | |
+ if identifier: | |
+ if not (ResourceBase.objects.filter(uuid=identifier)): | |
+ self.object.uuid = identifier | |
+ else: | |
+ logger.error("The UUID identifier from the XML Metadata is already in use in this system.") | |
+ # uuid is used but self.object is already created in database | |
+ # delete it and raise error message | |
+ self.object.delete() | |
+ raise AttributeError("The UUID identifier from the XML Metadata is already in use in this system.") | |
+ | |
+ if title: | |
+ self.object.title = title | |
+ | |
if abstract: | |
self.object.abstract = abstract | |
@@ -265,8 +320,19 @@ | |
self.object.date = date | |
self.object.date_type = "Creation" | |
+ # resolving all regions found by the Region model see geonode/geoserver/tasks.py:327 | |
+ # add the unresolved ones to the keywords | |
if len(regions) > 0: | |
- self.object.regions.add(*regions) | |
+ regions_resolved, regions_unresolved = resolve_regions(regions) | |
+ regions_resolved = list(set(regions_resolved)) | |
+ keywords.extend(regions_unresolved) | |
+ if regions_resolved: | |
+ if len(regions_resolved) > 0: | |
+ if not self.object.regions: | |
+ self.object.regions = regions_resolved | |
+ else: | |
+ self.object.regions.clear() | |
+ self.object.regions.add(*regions_resolved) | |
if len(keywords) > 0: | |
self.object.keywords.add(*keywords) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment