Skip to content

Instantly share code, notes, and snippets.

@archaeogeek
Created October 15, 2020 08:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save archaeogeek/7a8a0f6811ab9af98d511ac171a6531e to your computer and use it in GitHub Desktop.
Save archaeogeek/7a8a0f6811ab9af98d511ac171a6531e to your computer and use it in GitHub Desktop.
Python snippet for updating GeoNetwork using batch editing API
import requests
import click
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import json
import pandas as pd
from collections import Counter
import os
import glob
import sys
import xml.etree.ElementTree as ET
# code removed for setting up logging and email etc
@click.group()
@click.option('--url', prompt=True, help='Geonetwork URL')
@click.option('--username', prompt=True, default='admin', help='Geonetwork username')
@click.password_option('--password', prompt=True, confirmation_prompt=True, hide_input=False, help='Geonetwork password')
@click.pass_context
def cli(ctx,url,username,password):
"""Modules for updating metadata UUIDs based on values in a CSV, see samples for structure"""
# disabling https warnings while testing
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
geonetwork_session = requests.Session()
geonetwork_session.auth = HTTPBasicAuth(username, password)
geonetwork_session.headers.update({"Accept" : "application/json"})
# Make a call to an endpoint to get cookies and an xsrf token
geonetwork_url = url + '/eng/info?type=me'
r_post = geonetwork_session.post(geonetwork_url,
verify=False
)
token = geonetwork_session.cookies.get('XSRF-TOKEN')
geonetwork_session.headers.update({"X-XSRF-TOKEN" : geonetwork_session.cookies.get('XSRF-TOKEN')})
# add session and credentials as context objects so they can be used elsewhere
ctx.obj = {
'session': geonetwork_session,
'username': username,
'password': password,
'url': url
}
@cli.command()
@click.option('--inputdir',prompt=True, help='Directory containing CSV file')
@click.pass_context
def abstractupdate(ctx,inputdir):
"""Update manual fields in metadata records from csv.
records identified by title"""
# disabling https warnings while testing
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
try:
list_of_files = glob.glob(inputdir +'/abstracts.csv')
latest_file = max(list_of_files, key=os.path.getmtime)
except:
errorstring = 'Directory or CSV file not found, aborting. Full error: %s' % (str(sys.exc_info()))
click.echo(errorstring)
sys.exit(1)
try:
df = pd.read_csv(latest_file)
except pd.errors.ParserError:
errorstring = '%s is not a CSV file, aborting! Full error: %s' % (latest_file, str(sys.exc_info()))
click.echo(errorstring)
sys.exit(1)
click.echo(latest_file)
metadata=df.to_dict(orient='records')
xpathdict = {}
xpathdict['abstract'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:abstract/gco:CharacterString'
xpathdict['keywords'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords[0]/gmd:MD_Keywords/gmd:keyword/gco:CharacterString[not(parent::*/following-sibling::gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString=\"GEMET - INSPIRE themes, version 1.0\")]'
xpathdict['updatefrequency'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue'
xpathdict['inspirekeyword'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString[parent::*/following-sibling::gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString=\"GEMET - INSPIRE themes, version 1.0\"]'
xpathdict['scale'] = '/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer'
click.echo(xpathdict)
for i in metadata:
titles = [i['recordtitle']]
for t in titles:
session = ctx.obj['session']
url = ctx.obj['url']
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password'])
headers = session.headers
cookies = session.cookies
geonetworkUpdateURL = url + '/eng/q?title=' + t
updateURL = session.post(geonetworkUpdateURL,
headers=headers,
# params=params,
verify=False
)
tree = ET.fromstring(updateURL.text)
for item in tree.findall('./metadata/*/uuid'):
i['uuid'] = item.text
updateurl = url + "/api/0.1/records/batchediting?uuids=" + i['uuid'] + "&bucket=be101&updateDateStamp=false"
for k,v in xpathdict.items():
# build payload
jsonpayload = json.dumps([{"xpath":v,"value": str(i[k])}])
session = ctx.obj['session']
url = ctx.obj['url']
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password'])
headers = session.headers
cookies = session.cookies
headers.update({'Content-Type': 'application/json'})
geonetworkAddURL = updateurl
click.echo(geonetworkAddURL)
updateURL = session.put(geonetworkAddURL,
headers=headers,
data=jsonpayload,
verify=False
)
click.echo(updateURL.text)
@cli.command()
@click.option('--inputdir',prompt=True, help='Directory containing CSV file')
@click.pass_context
def contactupdate(ctx,inputdir):
"""Update contact info in metadata records from csv.
records identified by title"""
# disabling https warnings while testing
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
try:
list_of_files = glob.glob(inputdir +'/contacts.csv')
latest_file = max(list_of_files, key=os.path.getmtime)
except:
errorstring = 'Directory or CSV file not found, aborting. Full error: %s' % (str(sys.exc_info()))
click.echo(errorstring)
sys.exit(1)
try:
df = pd.read_csv(latest_file)
except pd.errors.ParserError:
errorstring = '%s is not a CSV file, aborting! Full error: %s' % (latest_file, str(sys.exc_info()))
click.echo(errorstring)
sys.exit(1)
click.echo(latest_file)
metadata=df.to_dict(orient='records')
for i in metadata:
titles = [i['recordtitle']]
for t in titles:
session = ctx.obj['session']
url = ctx.obj['url']
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password'])
headers = session.headers
cookies = session.cookies
geonetworkUpdateURL = url + '/eng/q?title=' + t
updateURL = session.post(geonetworkUpdateURL,
headers=headers,
# params=params,
verify=False
)
tree = ET.fromstring(updateURL.text)
for item in tree.findall('./metadata/*/uuid'):
i['uuid'] = item.text
click.echo(i)
#click.echo(i["Email"])
updateurl = url + "/api/0.1/records/batchediting?uuids=" + i['uuid'] + "&bucket=be101&updateDateStamp=false"
for xpath in ("/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact","/gmd:MD_Metadata/gmd:contact"):
# build json payload
jsonpayload = json.dumps([{"value":"<gmd:CI_ResponsibleParty xmlns:gmd=\"http://www.isotc211.org/2005/gmd\"> \
<gmd:individualName> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Name'] + "</gco:CharacterString> \
</gmd:individualName> \
<gmd:organisationName> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Organisation'] + "</gco:CharacterString> \
</gmd:organisationName> \
<gmd:positionName> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Position'].replace('&','&amp;') + "</gco:CharacterString> \
</gmd:positionName> \
<gmd:contactInfo> \
<gmd:CI_Contact> \
<gmd:phone> \
<gmd:CI_Telephone> \
<gmd:voice> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Phone'] + "</gco:CharacterString> \
</gmd:voice> \
</gmd:CI_Telephone> \
</gmd:phone> \
<gmd:address> \
<gmd:CI_Address> \
<gmd:deliveryPoint> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Address'] + "</gco:CharacterString> \
</gmd:deliveryPoint> \
<gmd:city> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['City'] + "</gco:CharacterString> \
</gmd:city> \
<gmd:administrativeArea> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['AdministrativeArea'] + "</gco:CharacterString> \
</gmd:administrativeArea> \
<gmd:postalCode> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['PostCode'] + "</gco:CharacterString> \
</gmd:postalCode> \
<gmd:country> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">UK</gco:CharacterString> \
</gmd:country> \
<gmd:electronicMailAddress> \
<gco:CharacterString xmlns:gco=\"http://www.isotc211.org/2005/gco\">" + i['Email'] + "</gco:CharacterString> \
</gmd:electronicMailAddress> \
</gmd:CI_Address> \
</gmd:address> \
</gmd:CI_Contact> \
</gmd:contactInfo> \
<gmd:role> \
<gmd:CI_RoleCode codeListValue=\"pointOfContact\" \
codeList=\"http://standards.iso.org/iso/19139/resources/gmxCodelists.xml#CI_RoleCode\"/> \
</gmd:role> \
</gmd:CI_ResponsibleParty>",
"xpath": xpath}])
session = ctx.obj['session']
url = ctx.obj['url']
session.auth = HTTPBasicAuth(ctx.obj['username'],ctx.obj['password'])
headers = session.headers
cookies = session.cookies
headers.update({'Content-Type': 'application/json'})
geonetworkAddURL = updateurl
click.echo(geonetworkAddURL)
updateURL = session.put(geonetworkAddURL,
headers=headers,
data=jsonpayload,
verify=False
)
click.echo(updateURL.text)
if __name__ == '__main__':
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment