Skip to content

Instantly share code, notes, and snippets.

@gannebamm
Created March 3, 2021 14:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save gannebamm/965628b5e71361f7010a7b5b9dff1586 to your computer and use it in GitHub Desktop.
Save gannebamm/965628b5e71361f7010a7b5b9dff1586 to your computer and use it in GitHub Desktop.
Enable XML metadata parsing from documents for GeoNode. See https://github.com/GeoNode/geonode/issues/6876
Index: geonode/documents/views.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/geonode/documents/views.py b/geonode/documents/views.py
--- a/geonode/documents/views.py (revision a5a217bd49e7f91b69ace14a54cdb7219fa5d0dc)
+++ b/geonode/documents/views.py (revision 9e436c39e470b5a8aae6304f4a62ccbe00f4cf1c)
@@ -49,7 +49,8 @@
from geonode.base.forms import CategoryForm, TKeywordForm
from geonode.base.models import (
Thesaurus,
- TopicCategory)
+ TopicCategory,
+ ResourceBase)
from geonode.documents.enumerations import DOCUMENT_TYPE_MAP, DOCUMENT_MIMETYPE_MAP
from geonode.documents.models import Document, get_related_resources
from geonode.documents.forms import DocumentForm, DocumentCreateForm, DocumentReplaceForm
@@ -62,6 +63,12 @@
from dal import autocomplete
+# these two should get refactored into the base model
+from geonode.layers.metadata import set_metadata
+from geonode.layers.utils import resolve_regions
+
+from geonode.base.models import SpatialRepresentationType
+
logger = logging.getLogger("geonode.documents.views")
ALLOWED_DOC_TYPES = settings.ALLOWED_DOCUMENT_TYPES
@@ -238,6 +245,8 @@
form.save_many2many()
self.object.set_permissions(form.cleaned_data['permissions'])
+ identifier = None
+ title = None
abstract = None
date = None
regions = []
@@ -258,6 +267,52 @@
except Exception:
logger.error("Exif extraction failed.")
+ # extract XML metadata
+ if self.object.mime_type == 'application/xml' \
+ and getattr(settings, 'EXTRACT_METADATA_FROM_XML_DOCS', True):
+ try:
+ # set metadata
+ with open(self.object.doc_file.path) as f:
+ xml_file = f.read()
+
+ # get model properties from XML
+ identifier, vals, regions, keywords = set_metadata(xml_file)
+
+ metadata = {}
+
+ metadata['metadata_xml'] = xml_file
+ metadata['uuid'] = identifier
+
+ for key, value in vals.items():
+ if key == 'spatial_representation_type':
+ value = SpatialRepresentationType(identifier=value)
+ elif key == 'topic_category':
+ value, created = TopicCategory.objects.get_or_create(
+ identifier=value.lower(),
+ defaults={'description': '', 'gn_description': value})
+ key = 'category'
+ metadata[key] = value
+ except Exception:
+ logger.error("XML metadata extraction failed.")
+
+ # use values to populate Document fields
+ title = metadata['title']
+ abstract = metadata['abstract']
+ date = metadata['date']
+
+ if identifier:
+ if not (ResourceBase.objects.filter(uuid=identifier)):
+ self.object.uuid = identifier
+ else:
+ logger.error("The UUID identifier from the XML Metadata is already in use in this system.")
+ # uuid is used but self.object is already created in database
+ # delete it and raise error message
+ self.object.delete()
+ raise AttributeError("The UUID identifier from the XML Metadata is already in use in this system.")
+
+ if title:
+ self.object.title = title
+
if abstract:
self.object.abstract = abstract
@@ -265,8 +320,19 @@
self.object.date = date
self.object.date_type = "Creation"
+ # resolving all regions found by the Region model see geonode/geoserver/tasks.py:327
+ # add the unresolved ones to the keywords
if len(regions) > 0:
- self.object.regions.add(*regions)
+ regions_resolved, regions_unresolved = resolve_regions(regions)
+ regions_resolved = list(set(regions_resolved))
+ keywords.extend(regions_unresolved)
+ if regions_resolved:
+ if len(regions_resolved) > 0:
+ if not self.object.regions:
+ self.object.regions = regions_resolved
+ else:
+ self.object.regions.clear()
+ self.object.regions.add(*regions_resolved)
if len(keywords) > 0:
self.object.keywords.add(*keywords)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment