Skip to content

Instantly share code, notes, and snippets.

@alanorth
Created January 5, 2021 14:17
Show Gist options
  • Save alanorth/281b7624301049e8fa91742b9b8c51b9 to your computer and use it in GitHub Desktop.
Save alanorth/281b7624301049e8fa91742b9b8c51b9 to your computer and use it in GitHub Desktop.
DOI to Handle script so we can Tweet some Handles without Altmetric scores and have Altmetric notice them
#!/usr/bin/env python3
#
# doi-to-handle.py 0.0.1
#
# Copyright 2021 Alan Orth.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# ---
#
# This script is written for Python 3.6+ and requires several modules that you
# can install with pip (I recommend setting up a Python virtual environment):
#
# $ pip install psycopg2-binary
#
import argparse
from colorama import Fore
import csv
import psycopg2
import signal
import sys
# read dois from a text file, one per line
def read_dois_from_file():
# initialize an empty list for DOIs
dois = []
for line in args.input_file:
# trim any leading or trailing whitespace (including newlines)
line = line.strip()
# iterate over results and add dois that aren't already present
if line not in dois:
dois.append(line)
# close input file before we exit
args.input_file.close()
resolve_dois(dois)
def resolve_dois(dois):
# metadata_field_id for metadata values (from metadatafieldregistry and
# might differ from site to site).
title_metadata_field_id = 64
handle_metadata_field_id = 25
doi_metadata_field_id = 220
# field names for the CSV
fieldnames = ["title", "handle", "doi"]
writer = csv.DictWriter(args.output_file, fieldnames=fieldnames)
writer.writeheader()
# iterate through our DOIs
for doi in dois:
print(f"Looking up {doi} in database")
with conn:
# cursor will be closed after this block exits
# see: http://initd.org/psycopg/docs/usage.html#with-statement
with conn.cursor() as cursor:
# make a temporary string we can use with the PostgreSQL regex
doi_string = f".*{doi}.*"
# get the dspace_object_id for the item with this DOI
sql = "SELECT dspace_object_id FROM metadatavalue WHERE metadata_field_id=%s AND text_value ~* %s"
cursor.execute(
sql,
(doi_metadata_field_id, doi_string),
)
# make sure rowcount is exactly 1, because some DOIs are used
# multiple times and I ain't got time for that right now
if cursor.rowcount == 1 and not args.quiet:
dspace_object_id = cursor.fetchone()[0]
print(f"Found {doi}, DSpace object: {dspace_object_id}")
elif cursor.rowcount > 1 and not args.quiet:
print(f"Found multiple items for {doi}")
else:
print(f"Not found: {doi}")
# get the title
sql = "SELECT text_value FROM metadatavalue WHERE metadata_field_id=%s AND dspace_object_id=%s"
cursor.execute(sql, (title_metadata_field_id, dspace_object_id))
title = cursor.fetchone()[0]
# get the handle
cursor.execute(sql, (handle_metadata_field_id, dspace_object_id))
handle = cursor.fetchone()[0]
row = {
"title": title,
"handle": handle,
"doi": doi,
}
writer.writerow(row)
# close database connection before we exit
conn.close()
# close output file before we exit
args.output_file.close()
def signal_handler(signal, frame):
# close output file before we exit
args.output_file.close()
sys.exit(1)
parser = argparse.ArgumentParser(
description="Query DSpace database for item metadata based on a list of DOIs in a text file."
)
parser.add_argument(
"-d",
"--debug",
help="Print debug messages to standard error (stderr).",
action="store_true",
)
parser.add_argument("-db", "--database-name", help="Database name", required=True)
parser.add_argument(
"-i",
"--input-file",
help="File name containing DOIs to resolve.",
required=True,
type=argparse.FileType("r"),
)
parser.add_argument(
"-o",
"--output-file",
help="File name to save CSV output.",
required=True,
type=argparse.FileType("w"),
)
parser.add_argument("-p", "--database-pass", help="Database password", required=True)
parser.add_argument(
"-q",
"--quiet",
help="Do not print progress messages to the screen.",
action="store_true",
)
parser.add_argument("-u", "--database-user", help="Database username", required=True)
args = parser.parse_args()
# set the signal handler for SIGINT (^C) so we can exit cleanly
signal.signal(signal.SIGINT, signal_handler)
# connect to database
try:
conn = psycopg2.connect(
"dbname={} user={} password={} host='localhost'".format(
args.database_name, args.database_user, args.database_pass
)
)
if args.debug:
sys.stderr.write(Fore.GREEN + "Connected to database.\n" + Fore.RESET)
except psycopg2.OperationalError:
sys.stderr.write(Fore.RED + "Could not connect to database.\n" + Fore.RESET)
sys.exit(1)
read_dois_from_file()
exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment