Skip to content

Instantly share code, notes, and snippets.

@tomkralidis
Last active February 2, 2022 14:19
Show Gist options
  • Save tomkralidis/9d5369b2de2994f55b6c3cd64a149932 to your computer and use it in GitHub Desktop.
Save tomkralidis/9d5369b2de2994f55b6c3cd64a149932 to your computer and use it in GitHub Desktop.
GeoCRIS metadata generation
<gmd:MD_Metadata xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gml="http://www.opengis.net/gml" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gco="http://www.isotc211.org/2005/gco" xsi:schemaLocation="http://www.isotc211.org/2005/gmd http://www.isotc211.org/2005/gmd/gmd.xsd">
<gmd:fileIdentifier>
<gco:CharacterString>{{ record['identifier'] }}</gco:CharacterString>
</gmd:fileIdentifier>
<gmd:language>
<gco:CharacterString>{{ record['language'] }}</gco:CharacterString>
</gmd:language>
<gmd:characterSet>
<gmd:MD_CharacterSetCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf8</gmd:MD_CharacterSetCode>
</gmd:characterSet>
<gmd:hierarchyLevel>
<gmd:MD_ScopeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="{{ record['type'] }}">{{ record['type'] }}</gmd:MD_ScopeCode>
</gmd:hierarchyLevel>
<gmd:contact>
<gmd:CI_ResponsibleParty>
<gmd:organisationName>
<gco:CharacterString>{{ record['organization'] }}</gco:CharacterString>
</gmd:organisationName>
<gmd:role>
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact">pointOfContact</gmd:CI_RoleCode>
</gmd:role>
</gmd:CI_ResponsibleParty>
</gmd:contact>
<gmd:dateStamp>
<gco:DateTime>{{ record['insert_date'] }}</gco:DateTime>
</gmd:dateStamp>
<gmd:metadataStandardName>
<gco:CharacterString>ISO 19115:2003 - Geographic information - Metadata</gco:CharacterString>
</gmd:metadataStandardName>
<gmd:metadataStandardVersion>
<gco:CharacterString>ISO 19115:2003</gco:CharacterString>
</gmd:metadataStandardVersion>
<gmd:dataSetURI>
<gco:CharacterString>{{ record['dataset'] }}</gco:CharacterString>
</gmd:dataSetURI>
<gmd:referenceSystemInfo>
<gmd:MD_ReferenceSystem>
<gmd:referenceSystemIdentifier>
<gmd:RS_Identifier>
<gmd:code>
<gco:CharacterString>{{ record['crs'][1] }}</gco:CharacterString>
</gmd:code>
<gmd:codeSpace>
<gco:CharacterString>{{ record['crs'][0] }}</gco:CharacterString>
</gmd:codeSpace>
<gmd:version>
<gco:CharacterString>6.11</gco:CharacterString>
</gmd:version>
</gmd:RS_Identifier>
</gmd:referenceSystemIdentifier>
</gmd:MD_ReferenceSystem>
</gmd:referenceSystemInfo>
<gmd:identificationInfo>
<gmd:MD_DataIdentification>
<gmd:citation>
<gmd:CI_Citation>
<gmd:title>
<gco:CharacterString>{{ record['title'] }}</gco:CharacterString>
</gmd:title>
<gmd:date>
<gmd:CI_Date>
<gmd:date>
<gco:Date>{{ record['date_creation'] }}</gco:Date>
</gmd:date>
<gmd:dateType>
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="creation">creation</gmd:CI_DateTypeCode>
</gmd:dateType>
</gmd:CI_Date>
</gmd:date>
<gmd:date>
<gmd:CI_Date>
<gmd:date>
<gco:Date>{{ record['date_revision'] }}</gco:Date>
</gmd:date>
<gmd:dateType>
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="revision">revision</gmd:CI_DateTypeCode>
</gmd:dateType>
</gmd:CI_Date>
</gmd:date>
<gmd:date>
<gmd:CI_Date>
<gmd:date>
<gco:Date>{{ record['date_publication'] }}</gco:Date>
</gmd:date>
<gmd:dateType>
<gmd:CI_DateTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode" codeListValue="publication">publication</gmd:CI_DateTypeCode>
</gmd:dateType>
</gmd:CI_Date>
</gmd:date>
<gmd:presentationForm>
<gmd:CI_PresentationFormCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_PresentationFormCode" codeListValue="mapDigital">mapDigital</gmd:CI_PresentationFormCode>
</gmd:presentationForm>
</gmd:CI_Citation>
</gmd:citation>
<gmd:abstract>
<gco:CharacterString>{{ record['abstract'] }}</gco:CharacterString>
</gmd:abstract>
<gmd:status>
<gmd:MD_ProgressCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ProgressCode" codeListValue="onGoing">onGoing</gmd:MD_ProgressCode>
</gmd:status>
<gmd:contact>
<gmd:CI_ResponsibleParty>
<gmd:organisationName>
<gco:CharacterString>{{ record['organization'] }}</gco:CharacterString>
</gmd:organisationName>
<gmd:role>
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="pointOfContact">pointOfContact</gmd:CI_RoleCode>
</gmd:role>
</gmd:CI_ResponsibleParty>
</gmd:contact>
<gmd:contact>
<gmd:CI_ResponsibleParty>
<gmd:organisationName>
<gco:CharacterString>{{ record['creator'] }}</gco:CharacterString>
</gmd:organisationName>
<gmd:role>
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="creator">creator</gmd:CI_RoleCode>
</gmd:role>
</gmd:CI_ResponsibleParty>
</gmd:contact>
<gmd:contact>
<gmd:CI_ResponsibleParty>
<gmd:organisationName>
<gco:CharacterString>{{ record['publisher'] }}</gco:CharacterString>
</gmd:organisationName>
<gmd:role>
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="publisher">publisher</gmd:CI_RoleCode>
</gmd:role>
</gmd:CI_ResponsibleParty>
</gmd:contact>
<gmd:contact>
<gmd:CI_ResponsibleParty>
<gmd:organisationName>
<gco:CharacterString>{{ record['contributor'] }}</gco:CharacterString>
</gmd:organisationName>
<gmd:role>
<gmd:CI_RoleCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_RoleCode" codeListValue="contributor">contributor</gmd:CI_RoleCode>
</gmd:role>
</gmd:CI_ResponsibleParty>
</gmd:contact>
<gmd:resourceMaintenance>
<gmd:MD_MaintenanceInformation>
<gmd:maintenanceAndUpdateFrequency>
<gmd:MD_MaintenanceFrequencyCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_MaintenanceFrequencyCode" codeListValue="continual">continual</gmd:MD_MaintenanceFrequencyCode>
</gmd:maintenanceAndUpdateFrequency>
</gmd:MD_MaintenanceInformation>
</gmd:resourceMaintenance>
<gmd:resourceFormat>
<gmd:MD_Format>
<gmd:name>
<gco:CharacterString>{{ record['format'] }}</gco:CharacterString>
</gmd:name>
<gmd:version>
<gco:CharacterString gco:nilReason="missing"/>
</gmd:version>
</gmd:MD_Format>
</gmd:resourceFormat>
<gmd:descriptiveKeywords>
<gmd:MD_Keywords>
{% for kw in record['keywords'] %}
<gmd:keyword>
<gco:CharacterString>{{ kw }}</gco:CharacterString>
</gmd:keyword>
{% endfor %}
<gmd:type>
<gmd:MD_KeywordTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="theme">theme</gmd:MD_KeywordTypeCode>
</gmd:type>
</gmd:MD_Keywords>
</gmd:descriptiveKeywords>
<gmd:descriptiveKeywords>
<gmd:MD_Keywords>
<gmd:keyword>
<gco:CharacterString>{{ record['country']['name'] }}</gco:CharacterString>
</gmd:keyword>
<gmd:type>
<gmd:MD_KeywordTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_KeywordTypeCode" codeListValue="place">place</gmd:MD_KeywordTypeCode>
</gmd:type>
</gmd:MD_Keywords>
</gmd:descriptiveKeywords>
<gmd:spatialRepresentationType>
<gmd:MD_SpatialRepresentationTypeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_SpatialRepresentationTypeCode" codeListValue="{{ record['data_type'] }}">{{ record['data_type'] }}</gmd:MD_SpatialRepresentationTypeCode>
</gmd:spatialRepresentationType>
<gmd:language>
<gco:CharacterString>{{ record['lanaguage'] }}</gco:CharacterString>
</gmd:language>
<gmd:characterSet>
<gmd:MD_CharacterSetCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_CharacterSetCode" codeListValue="utf8">utf8</gmd:MD_CharacterSetCode>
</gmd:characterSet>
<gmd:topicCategory>
<gmd:MD_TopicCategoryCode>{{ record['category']['name'] }}</gmd:MD_TopicCategoryCode>
</gmd:topicCategory>
<gmd:extent>
<gmd:EX_Extent>
<gmd:geographicElement>
<gmd:EX_GeographicBoundingBox>
<gmd:westBoundLongitude>
<gco:Decimal>{{ record['country']['bbox'][0] }}</gco:Decimal>
</gmd:westBoundLongitude>
<gmd:eastBoundLongitude>
<gco:Decimal>{{ record['country']['bbox'][2] }}</gco:Decimal>
</gmd:eastBoundLongitude>
<gmd:southBoundLatitude>
<gco:Decimal>{{ record['country']['bbox'][1] }}</gco:Decimal>
</gmd:southBoundLatitude>
<gmd:northBoundLatitude>
<gco:Decimal>{{ record['country']['bbox'][3] }}</gco:Decimal>
</gmd:northBoundLatitude>
</gmd:EX_GeographicBoundingBox>
</gmd:geographicElement>
</gmd:EX_Extent>
</gmd:extent>
<gmd:extent>
<gmd:EX_Extent>
<gmd:temporalElement>
<gmd:EX_TemporalExtent>
<gmd:extent>
<gml:TimePeriod gml:id="T_01">
<gml:beginPosition>{{ record['time_begin'] }}</gml:beginPosition>
<gml:endPosition>{{ record['time_end'] }}</gml:endPosition>
</gml:TimePeriod>
</gmd:extent>
</gmd:EX_TemporalExtent>
</gmd:temporalElement>
</gmd:EX_Extent>
</gmd:extent>
<gmd:supplementalInformation>
<gco:CharacterString>{{ record['supplemental_information'] }}</gco:CharacterString>
</gmd:supplementalInformation>
</gmd:MD_DataIdentification>
</gmd:identificationInfo>
<gmd:distributionInfo>
<gmd:MD_Distribution>
<gmd:transferOptions>
<gmd:MD_DigitalTransferOptions>
{% for link in record['links'] %}
<gmd:onLine>
<gmd:CI_OnlineResource>
<gmd:linkage>
<gmd:URL>{{ link['url'] }}</gmd:URL>
</gmd:linkage>
<gmd:protocol>
<gco:CharacterString>{{ link['protocol'] }}</gco:CharacterString>
</gmd:protocol>
<gmd:name>
<gco:CharacterString>{{ link['name'] }}</gco:CharacterString>
</gmd:name>
<gmd:description>
<gco:CharacterString>{{ link['description'] }}</gco:CharacterString>
</gmd:description>
</gmd:CI_OnlineResource>
</gmd:onLine>
{% endfor %}
</gmd:MD_DigitalTransferOptions>
</gmd:transferOptions>
</gmd:MD_Distribution>
</gmd:distributionInfo>
<gmd:dataQualityInfo>
<gmd:DQ_DataQuality>
<gmd:scope>
<gmd:DQ_Scope>
<gmd:level>
<gmd:MD_ScopeCode codeSpace="ISOTC211/19115" codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#MD_ScopeCode" codeListValue="{{ record['type'] }}">{{ record['type'] }}</gmd:MD_ScopeCode>
</gmd:level>
</gmd:DQ_Scope>
</gmd:scope>
<gmd:lineage>
<gmd:LI_Lineage>
<gmd:statement>
<gco:CharacterString>{{ record['data_quality'] }}</gco:CharacterString>
</gmd:statement>
</gmd:LI_Lineage>
</gmd:lineage>
</gmd:DQ_DataQuality>
</gmd:dataQualityInfo>
</gmd:MD_Metadata>
import io
import os
import shutil
import ssl
import sys
from urllib.request import urlopen
import uuid
from jinja2 import Environment, FileSystemLoader
import pyexcel
TEMPLATE_ENV = Environment(
loader=FileSystemLoader(['.']),
autoescape=True
)
TEMPLATE = TEMPLATE_ENV.get_template('full-metadata.j2')
WMS_URL = 'https://geocris2.cdema.org/cgi-bin/mapserv?map=/home/cdemabb/geocris-backend/mapserver/rasters.map' # noqa
TMS_URL = 'https://geocris2.cdema.org/mapproxy/tiles/1.0.0/{}/webmercator/{{z}}/{{x}}/{{y}}.png' # noqa
OAPIF_URL = 'https://geocris2.cdema.org/features/collections/{}.{}/items.json'
MVT_URL = 'https://geocris2.cdema.org/tegola/maps/{}/{}/{{z}}/{{x}}/{{y}}.pbf'
DOC_URL = 'https://geocris2.cdema.org/documents/{}'
def isempty(value):
if value in ['', None]:
return True
else:
return False
def render_template(record):
pass
def parse_spreadsheet(filename):
data = {
'categories': {},
'countries': {}
}
book = pyexcel.get_book(file_name=filename)
categories = book['Categories']
categories.name_columns_by_row(0)
for row in range(0, categories.number_of_rows()-1):
code = categories[row, 'Code']
name = categories[row, 'Name']
label = categories[row, 'Label']
data['categories'][code] = {'name': name, 'label': label}
countries = book['Countries']
countries.name_columns_by_row(0)
for row in range(0, countries.number_of_rows()-1):
code = countries[row, 'Code']
name = countries[row, 'Name']
bbox = [
countries[row, 'West'],
countries[row, 'South'],
countries[row, 'East'],
countries[row, 'North']
]
data['countries'][code] = {
'code': code,
'name': name,
'bbox': bbox
}
data['records'] = book['Metadata']
data['records'].name_columns_by_row(0)
return data
def generate_metadata(d):
records = []
for row in range(0, d['records'].number_of_rows()-1):
print('Processing {}'.format(d['records'][row, 'dataset']))
if isempty(d['records'][row, 'dataset']):
print('Skipping row {}'.format(row))
continue
m = {'links': []}
m['identifier'] = d['records'][row, 'identifier']
if isempty(m['identifier']):
print(' identifier empty; generating uuid')
m['identifier'] = (uuid.uuid4())
m['type'] = d['records'][row, 'type']
m['country'] = d['countries'][d['records'][row, 'country']]
m['category'] = d['categories'][d['records'][row, 'category']]
m['data_type'] = d['records'][row, 'data_type']
if m['data_type'] is not None:
m['data_type'] = m['data_type'].lower()
if m['data_type'] == 'raster':
m['data_type'] = 'grid'
m['dataset'] = d['records'][row, 'dataset']
m['title'] = d['records'][row, 'title']
m['abstract'] = d['records'][row, 'abstract']
m['organization'] = d['records'][row, 'organization']
m['keywords'] = [x.strip() for x in d['records'][row, 'keywords'].split(';')] # noqa
m['language'] = 'en' # d['records'][row, 'language']
m['date'] = d['records'][row, 'date']
m['insert_date'] = d['records'][row, 'insert_date']
m['date_revision'] = d['records'][row, 'date_revision']
m['date_creation'] = d['records'][row, 'date_creation']
m['date_publication'] = d['records'][row, 'date_publication']
m['date_modified'] = d['records'][row, 'date_modified']
m['time_begin'] = d['records'][row, 'time_begin']
m['time_end'] = d['records'][row, 'time_end']
m['creator'] = d['records'][row, 'creator']
m['publisher'] = d['records'][row, 'publisher']
m['contributor'] = d['records'][row, 'credit/contributor']
m['format'] = d['records'][row, 'format']
m['source'] = d['records'][row, 'source']
m['crs'] = d['records'][row, 'crs'].split(':')
m['linkage'] = d['records'][row, 'linkage']
m['supplementary_information'] = d['records'][row, 'suppInfo']
m['data_quality'] = d['records'][row, 'DataQual']
m['classification'] = d['records'][row, 'Classification Code']
# links
# name, description, protocol, url
title_sanitized = m['title'].replace(',', ' ')
if m['data_type'] == 'vector':
url = OAPIF_URL.format(m['country']['code'], m['dataset'])
try:
with urlopen(url, context=ssl.SSLContext()) as u:
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'OGC:OAPIF',
'url': url
})
except Exception as err:
print(' OAPIF URL error: {}'.format(err))
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'Mapbox:MVT',
'url': MVT_URL.format(m['country']['code'], m['dataset'])
})
elif m['data_type'] in ['grid', 'raster']:
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'OGC:WMS',
'url': WMS_URL.format(m['country']['code'], m['dataset'])
})
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'OSGeo:TMS',
'url': TMS_URL.format(m['dataset'])
})
elif m['type'] == 'document':
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'WWW:LINK',
'url': DOC_URL.format(m['dataset'])
})
if not isempty(m['linkage']):
m['links'].append({
'name': m['dataset'],
'description': title_sanitized,
'protocol': 'WWW:LINK',
'url': m['linkage']
})
records.append(m)
return records
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage: {} </path/to/spreadsheet.ods>'.format(sys.argv[0]))
sys.exit(1)
print('parsing spreadsheet')
data = parse_spreadsheet(sys.argv[1])
print('generating XML')
records = generate_metadata(data)
print('Writing XML to disk')
if os.path.exists('output'):
shutil.rmtree('output')
os.makedirs('output')
for record in records:
xml = TEMPLATE.render(record=record)
filename = 'output/{}.xml'.format(record['dataset'])
with io.open(filename, 'w', encoding='utf-8') as fh:
fh.write(xml)
Jinja2
pyexcel
pyexcel-ods
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment