Created
January 5, 2021 14:17
-
-
Save alanorth/281b7624301049e8fa91742b9b8c51b9 to your computer and use it in GitHub Desktop.
DOI to Handle script so we can Tweet some Handles without Altmetric scores and have Altmetric notice them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# doi-to-handle.py 0.0.1 | |
# | |
# Copyright 2021 Alan Orth. | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
# --- | |
# | |
# This script is written for Python 3.6+ and requires several modules that you | |
# can install with pip (I recommend setting up a Python virtual environment): | |
# | |
# $ pip install psycopg2-binary | |
# | |
import argparse | |
from colorama import Fore | |
import csv | |
import psycopg2 | |
import signal | |
import sys | |
# read dois from a text file, one per line | |
def read_dois_from_file(): | |
# initialize an empty list for DOIs | |
dois = [] | |
for line in args.input_file: | |
# trim any leading or trailing whitespace (including newlines) | |
line = line.strip() | |
# iterate over results and add dois that aren't already present | |
if line not in dois: | |
dois.append(line) | |
# close input file before we exit | |
args.input_file.close() | |
resolve_dois(dois) | |
def resolve_dois(dois): | |
# metadata_field_id for metadata values (from metadatafieldregistry and | |
# might differ from site to site). | |
title_metadata_field_id = 64 | |
handle_metadata_field_id = 25 | |
doi_metadata_field_id = 220 | |
# field names for the CSV | |
fieldnames = ["title", "handle", "doi"] | |
writer = csv.DictWriter(args.output_file, fieldnames=fieldnames) | |
writer.writeheader() | |
# iterate through our DOIs | |
for doi in dois: | |
print(f"Looking up {doi} in database") | |
with conn: | |
# cursor will be closed after this block exits | |
# see: http://initd.org/psycopg/docs/usage.html#with-statement | |
with conn.cursor() as cursor: | |
# make a temporary string we can use with the PostgreSQL regex | |
doi_string = f".*{doi}.*" | |
# get the dspace_object_id for the item with this DOI | |
sql = "SELECT dspace_object_id FROM metadatavalue WHERE metadata_field_id=%s AND text_value ~* %s" | |
cursor.execute( | |
sql, | |
(doi_metadata_field_id, doi_string), | |
) | |
# make sure rowcount is exactly 1, because some DOIs are used | |
# multiple times and I ain't got time for that right now | |
if cursor.rowcount == 1 and not args.quiet: | |
dspace_object_id = cursor.fetchone()[0] | |
print(f"Found {doi}, DSpace object: {dspace_object_id}") | |
elif cursor.rowcount > 1 and not args.quiet: | |
print(f"Found multiple items for {doi}") | |
else: | |
print(f"Not found: {doi}") | |
# get the title | |
sql = "SELECT text_value FROM metadatavalue WHERE metadata_field_id=%s AND dspace_object_id=%s" | |
cursor.execute(sql, (title_metadata_field_id, dspace_object_id)) | |
title = cursor.fetchone()[0] | |
# get the handle | |
cursor.execute(sql, (handle_metadata_field_id, dspace_object_id)) | |
handle = cursor.fetchone()[0] | |
row = { | |
"title": title, | |
"handle": handle, | |
"doi": doi, | |
} | |
writer.writerow(row) | |
# close database connection before we exit | |
conn.close() | |
# close output file before we exit | |
args.output_file.close() | |
def signal_handler(signal, frame): | |
# close output file before we exit | |
args.output_file.close() | |
sys.exit(1) | |
parser = argparse.ArgumentParser( | |
description="Query DSpace database for item metadata based on a list of DOIs in a text file." | |
) | |
parser.add_argument( | |
"-d", | |
"--debug", | |
help="Print debug messages to standard error (stderr).", | |
action="store_true", | |
) | |
parser.add_argument("-db", "--database-name", help="Database name", required=True) | |
parser.add_argument( | |
"-i", | |
"--input-file", | |
help="File name containing DOIs to resolve.", | |
required=True, | |
type=argparse.FileType("r"), | |
) | |
parser.add_argument( | |
"-o", | |
"--output-file", | |
help="File name to save CSV output.", | |
required=True, | |
type=argparse.FileType("w"), | |
) | |
parser.add_argument("-p", "--database-pass", help="Database password", required=True) | |
parser.add_argument( | |
"-q", | |
"--quiet", | |
help="Do not print progress messages to the screen.", | |
action="store_true", | |
) | |
parser.add_argument("-u", "--database-user", help="Database username", required=True) | |
args = parser.parse_args() | |
# set the signal handler for SIGINT (^C) so we can exit cleanly | |
signal.signal(signal.SIGINT, signal_handler) | |
# connect to database | |
try: | |
conn = psycopg2.connect( | |
"dbname={} user={} password={} host='localhost'".format( | |
args.database_name, args.database_user, args.database_pass | |
) | |
) | |
if args.debug: | |
sys.stderr.write(Fore.GREEN + "Connected to database.\n" + Fore.RESET) | |
except psycopg2.OperationalError: | |
sys.stderr.write(Fore.RED + "Could not connect to database.\n" + Fore.RESET) | |
sys.exit(1) | |
read_dois_from_file() | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment