Created
October 15, 2020 08:30
-
-
Save archaeogeek/7a8a0f6811ab9af98d511ac171a6531e to your computer and use it in GitHub Desktop.
Python snippet for updating GeoNetwork using batch editing API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import click | |
from requests.auth import HTTPBasicAuth | |
from requests.packages.urllib3.exceptions import InsecureRequestWarning | |
import json | |
import pandas as pd | |
from collections import Counter | |
import os | |
import glob | |
import sys | |
import xml.etree.ElementTree as ET | |
# code removed for setting up logging and email etc | |
@click.group() | |
@click.option('--url', prompt=True, help='Geonetwork URL') | |
@click.option('--username', prompt=True, default='admin', help='Geonetwork username') | |
@click.password_option('--password', prompt=True, confirmation_prompt=True, hide_input=False, help='Geonetwork password') | |
@click.pass_context | |
def cli(ctx,url,username,password): | |
"""Modules for updating metadata UUIDs based on values in a CSV, see samples for structure""" | |
# disabling https warnings while testing | |
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) | |
geonetwork_session = requests.Session() | |
geonetwork_session.auth = HTTPBasicAuth(username, password) | |
geonetwork_session.headers.update({"Accept" : "application/json"}) | |
# Make a call to an endpoint to get cookies and an xsrf token | |
geonetwork_url = url + '/eng/info?type=me' | |
r_post = geonetwork_session.post(geonetwork_url, | |
verify=False | |
) | |
token = geonetwork_session.cookies.get('XSRF-TOKEN') | |
geonetwork_session.headers.update({"X-XSRF-TOKEN" : geonetwork_session.cookies.get('XSRF-TOKEN')}) | |
# add session and credentials as context objects so they can be used elsewhere | |
ctx.obj = { | |
'session': geonetwork_session, | |
'username': username, | |
'password': password, | |
'url': url | |
} | |
@cli.command() | |
@click.option('--inputdir',prompt=True, help='Directory containing CSV file') | |
@click.pass_context | |
def abstractupdate(ctx,inputdir): | |
"""Update manual fields in metadata records from csv. | |
records identified by title""" | |
# disabling https warnings while testing | |
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) | |
try: | |
list_of_files = glob.glob(inputdir +'/abstracts.csv') | |
latest_file = max(list_of_files, key=os.path.getmtime) | |
except: | |
errorstring = 'Directory or CSV file not found, aborting. Full error: %s' % (str(sys.exc_info())) | |
click.echo(errorstring) | |
sys.exit(1) | |
try: | |
df = pd.read_csv(latest_file) | |
except pd.errors.ParserError: | |
errorstring = '%s is not a CSV file, aborting! Full error: %s' % (latest_file, str(sys.exc_info())) | |
click.echo(errorstring) | |
sys.exit(1) | |
click.echo(latest_file) | |
metadata=df.to_dict(orient='records') | |
xpathdict = {} | |
xpathdict['abstract'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString' | |
xpathdict['keywords'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords[0]/gmd:MD_Keywords/gmd:keyword/gco:CharacterString[not(parent::*/following-sibling::gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString=\"GEMET - INSPIRE themes, version 1.0\")]' | |
xpathdict['updatefrequency'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue' | |
xpathdict['inspirekeyword'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString[parent::*/following-sibling::gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString=\"GEMET - INSPIRE themes, version 1.0\"]' | |
xpathdict['scale'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer' | |
click.echo(xpathdict) | |
for i in metadata: | |
titles = [i['recordtitle']] | |
for t in titles: | |
session = ctx.obj['session'] | |
url = ctx.obj['url'] | |
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password']) | |
headers = session.headers | |
cookies = session.cookies | |
geonetworkUpdateURL = url + '/eng/q?title=' + t | |
updateURL = session.post(geonetworkUpdateURL, | |
headers=headers, | |
# params=params, | |
verify=False | |
) | |
tree = ET.fromstring(updateURL.text) | |
for item in tree.findall('./metadata/*/uuid'): | |
i['uuid'] = item.text | |
updateurl = url + "/api/0.1/records/batchediting?uuids=" + i['uuid'] + "&bucket=be101&updateDateStamp=false" | |
for k,v in xpathdict.items(): | |
# build payload | |
jsonpayload = json.dumps([{"xpath":v,"value": str(i[k])}]) | |
session = ctx.obj['session'] | |
url = ctx.obj['url'] | |
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password']) | |
headers = session.headers | |
cookies = session.cookies | |
headers.update({'Content-Type': 'application/json'}) | |
geonetworkAddURL = updateurl | |
click.echo(geonetworkAddURL) | |
updateURL = session.put(geonetworkAddURL, | |
headers=headers, | |
data=jsonpayload, | |
verify=False | |
) | |
click.echo(updateURL.text) | |
@cli.command() | |
@click.option('--inputdir',prompt=True, help='Directory containing CSV file') | |
@click.pass_context | |
def contactupdate(ctx,inputdir): | |
"""Update contact info in metadata records from csv. | |
records identified by title""" | |
# disabling https warnings while testing | |
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) | |
try: | |
list_of_files = glob.glob(inputdir +'/contacts.csv') | |
latest_file = max(list_of_files, key=os.path.getmtime) | |
except: | |
errorstring = 'Directory or CSV file not found, aborting. Full error: %s' % (str(sys.exc_info())) | |
click.echo(errorstring) | |
sys.exit(1) | |
try: | |
df = pd.read_csv(latest_file) | |
except pd.errors.ParserError: | |
errorstring = '%s is not a CSV file, aborting! Full error: %s' % (latest_file, str(sys.exc_info())) | |
click.echo(errorstring) | |
sys.exit(1) | |
click.echo(latest_file) | |
metadata=df.to_dict(orient='records') | |
for i in metadata: | |
titles = [i['recordtitle']] | |
for t in titles: | |
session = ctx.obj['session'] | |
url = ctx.obj['url'] | |
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password']) | |
headers = session.headers | |
cookies = session.cookies | |
geonetworkUpdateURL = url + '/eng/q?title=' + t | |
updateURL = session.post(geonetworkUpdateURL, | |
headers=headers, | |
# params=params, | |
verify=False | |
) | |
tree = ET.fromstring(updateURL.text) | |
for item in tree.findall('./metadata/*/uuid'): | |
i['uuid'] = item.text | |
click.echo(i) | |
#click.echo(i["Email"]) | |
updateurl = url + "/api/0.1/records/batchediting?uuids=" + i['uuid'] + "&bucket=be101&updateDateStamp=false" | |
for xpath in ("/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact","/gmd:MD_Metadata/gmd:contact"): | |
# build json payload | |
jsonpayload = json.dumps([{"value":"<gmd:CI_ResponsibleParty xmlns:gmd=\"http://www.isotc211.org/2005/gmd\"> \ | |
<gmd:individualName> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Name'] + "</gco:CharacterString> \ | |
</gmd:individualName> \ | |
<gmd:organisationName> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Organisation'] + "</gco:CharacterString> \ | |
</gmd:organisationName> \ | |
<gmd:positionName> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Position'].replace('&','&') + "</gco:CharacterString> \ | |
</gmd:positionName> \ | |
<gmd:contactInfo> \ | |
<gmd:CI_Contact> \ | |
<gmd:phone> \ | |
<gmd:CI_Telephone> \ | |
<gmd:voice> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Phone'] + "</gco:CharacterString> \ | |
</gmd:voice> \ | |
</gmd:CI_Telephone> \ | |
</gmd:phone> \ | |
<gmd:address> \ | |
<gmd:CI_Address> \ | |
<gmd:deliveryPoint> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Address'] + "</gco:CharacterString> \ | |
</gmd:deliveryPoint> \ | |
<gmd:city> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['City'] + "</gco:CharacterString> \ | |
</gmd:city> \ | |
<gmd:administrativeArea> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['AdministrativeArea'] + "</gco:CharacterString> \ | |
</gmd:administrativeArea> \ | |
<gmd:postalCode> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['PostCode'] + "</gco:CharacterString> \ | |
</gmd:postalCode> \ | |
<gmd:country> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">UK</gco:CharacterString> \ | |
</gmd:country> \ | |
<gmd:electronicMailAddress> \ | |
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Email'] + "</gco:CharacterString> \ | |
</gmd:electronicMailAddress> \ | |
</gmd:CI_Address> \ | |
</gmd:address> \ | |
</gmd:CI_Contact> \ | |
</gmd:contactInfo> \ | |
<gmd:role> \ | |
<gmd:CI_RoleCode codeListValue=\"pointOfContact\" \ | |
codeList=\"http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode\"/> \ | |
</gmd:role> \ | |
</gmd:CI_ResponsibleParty>", | |
"xpath": xpath}]) | |
session = ctx.obj['session'] | |
url = ctx.obj['url'] | |
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password']) | |
headers = session.headers | |
cookies = session.cookies | |
headers.update({'Content-Type': 'application/json'}) | |
geonetworkAddURL = updateurl | |
click.echo(geonetworkAddURL) | |
updateURL = session.put(geonetworkAddURL, | |
headers=headers, | |
data=jsonpayload, | |
verify=False | |
) | |
click.echo(updateURL.text) | |
if __name__ == '__main__': | |
cli() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment