Created
November 13, 2019 18:16
-
-
Save colin-daniels/60a917bbaaa0e418c41bb260fb57bb58 to your computer and use it in GitHub Desktop.
Fetch publications given an ORCID and turn them into a markdown-formatted publication list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# dependencies: jq, grep, sed, tr, curl, and perl URI::Escape (or replace url_encode function) | |
set -e | |
orcid="0000-0002-3512-7146" | |
# name to match when bolding in author list, should be abbreviated as in | |
# "Given Names Last" -> "G. N. Last" | |
match_name="C. Daniels" | |
# whether to output as a numbered list instead of unordered (i.e. bullet points) | |
output_numbered_list=true | |
function url_encode { | |
# note: requires `perl-uri` on arch linux | |
perl -MURI::Escape -e 'print uri_escape($ARGV[0]);' "${1?"Missing argument"}" | |
} | |
function echoerr { | |
echo "$@" 1>&2 | |
} | |
function get_dois_from { | |
jq -r " | |
# get the DOI for a ORCID group entry | |
def group_doi: | |
.groupId as \$gid | | |
.externalIdentifiers | map( | |
# we are only looking for DOIs here | |
select( | |
.externalIdentifierType.value == \"doi\" and | |
.relationship.value == \"self\" | |
) | | |
# actual DOI string (e.g., '10.1021/acsnano.9b05817') | |
.externalIdentifierId.value | |
) | | |
if length == 0 then | |
error(\"Missing DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\") | |
elif length > 1 then | |
error(\"More than one DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\") | |
else | |
.[0] | |
end; | |
# just get the associated DOI for each group entry | |
.groups[] | group_doi | |
" "${1?"missing input json file argument"}" | |
} | |
if [ -f "works.json" ]; then | |
echoerr "Reusing existing works.json for ORCID $orcid" | |
else | |
echoerr "Fetching works.json for ORCID $orcid" | |
# get works in order of latest-oldest | |
curl "https://orcid.org/$orcid/worksPage.json?offset=0&sort=date&sortAsc=false" \ | |
-H 'Accept: application/json, text/plain, */*' \ | |
-H "Referer: https://orcid.org/$orcid" \ | |
> works.json | |
fi | |
# get dois | |
echoerr "Getting DOIs from works.json" | |
get_dois_from works.json > works.txt | |
echoerr "Got $(wc -l < works.txt) DOIs from works.json" | |
# get data from crossref | |
echoerr "Getting info from crossref for each DOI" | |
while IFS="" read -r doi; do | |
# url encode because /'s | |
url_doi=$(url_encode "$doi") | |
# output filenames | |
xref_json="doi-$url_doi.json" | |
xref_bib="doi-$url_doi.bib" | |
if [ -f "$xref_json" ] && [ -f "$xref_bib" ]; then | |
echoerr " $doi already processed, skipping" | |
continue | |
fi | |
echoerr " Processing $doi" | |
curl "https://api.crossref.org/v1/works/$url_doi" > "$xref_json" | |
sleep 0.25 | |
curl "https://search.crossref.org/citation?format=bibtex&doi=$url_doi" > "$xref_bib" | |
sleep 0.75 | |
done < works.txt | |
# build markdown publication file...very hackily since this is taking too much time | |
echoerr "Building publications.md" | |
pub_idx=0 | |
while IFS="" read -r doi; do | |
echoerr " Processing $doi" | |
pub_idx=$(( pub_idx + 1 )) | |
url_doi=$(url_encode "$doi") | |
xref_json="doi-$url_doi.json" | |
xref_bib="doi-$url_doi.bib" | |
authors=$( | |
jq -r --arg name "$match_name" ' | |
def abbreviate: | |
split(" ") | map(.[:1] + ".") | join(" "); | |
.message.author | map( | |
"\(.given | abbreviate) \(.family)" | | |
# bold name in author list if it matches | |
if . == $name then | |
"**\(.)**" | |
else | |
. | |
end | |
) | join(", ")' "$xref_json" | |
) | |
title=$(jq -r '.message.title[0]' "$xref_json") | |
journal=$(jq -r '.message | .["container-title"][0]' "$xref_json") | |
year=$(grep 'year = ' "$xref_bib" | tr -d -c '[0-9]') | |
volume=$(grep 'volume = ' "$xref_bib" | sed -e 's/^.*{//' -e 's/}.*$//') | |
pages=$(grep 'pages = ' "$xref_bib" | sed -e 's/^.*{//' -e 's/}.*$//') | |
markdown="$authors, _${title}_, $journal" | |
if [ ! -z "$volume" ]; then markdown+=" $volume"; fi | |
markdown+=", " | |
if [ ! -z "$pages" ]; then markdown+="$pages "; fi | |
markdown+="[$doi](https://doi.org/$url_doi)" | |
if [ ! -z "$year" ]; then markdown+=" ($year)"; fi | |
markdown+="." | |
markdown=$(sed 's/--/–/g' <<< "$markdown") | |
if [ "$output_numbered_list" = true ]; then | |
echo "$pub_idx. $markdown" | |
else | |
echo "* $markdown" | |
fi | |
done < works.txt > publications.md | |
echoerr "Done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output looks something like this: