colin-daniels/orcid_to_publications_markdown.sh

## orcid_to_publications_markdown.sh
#!/bin/bash

# dependencies: jq, grep, sed, tr, curl, and perl URI::Escape (or replace url_encode function)
set -e
orcid="0000-0002-3512-7146"
# name to match when bolding in author list, should be abbreviated as in
# "Given Names Last" -> "G. N. Last"
match_name="C. Daniels"

# whether to output as a numbered list instead of unordered (i.e. bullet points)
output_numbered_list=true

function url_encode {
    # note: requires `perl-uri` on arch linux
    perl -MURI::Escape -e 'print uri_escape($ARGV[0]);' "${1?"Missing argument"}"
}

function echoerr {
    echo "$@" 1>&2
}

function get_dois_from {
    jq -r "
    # get the DOI for a ORCID group entry
    def group_doi:
        .groupId as \$gid |
        .externalIdentifiers | map(
            # we are only looking for DOIs here
            select(
                .externalIdentifierType.value == \"doi\" and
                .relationship.value == \"self\"
            ) |
            # actual DOI string (e.g., '10.1021/acsnano.9b05817')
            .externalIdentifierId.value
        ) |
        if length == 0 then
            error(\"Missing DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\")
        elif length > 1 then
            error(\"More than one DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\")
        else
            .[0]
        end;

    # just get the associated DOI for each group entry
    .groups[] | group_doi
    " "${1?"missing input json file argument"}"
}

if [ -f "works.json" ]; then
    echoerr "Reusing existing works.json for ORCID $orcid"
else
    echoerr "Fetching works.json for ORCID $orcid"
    # get works in order of latest-oldest
    curl "https://orcid.org/$orcid/worksPage.json?offset=0&sort=date&sortAsc=false" \
        -H 'Accept: application/json, text/plain, */*' \
        -H "Referer: https://orcid.org/$orcid" \
        > works.json
fi

# get dois
echoerr "Getting DOIs from works.json"
get_dois_from works.json > works.txt
echoerr "Got $(wc -l < works.txt) DOIs from works.json"

# get data from crossref
echoerr "Getting info from crossref for each DOI"
while IFS="" read -r doi; do
    # url encode because /'s
    url_doi=$(url_encode "$doi")
    # output filenames
    xref_json="doi-$url_doi.json"
    xref_bib="doi-$url_doi.bib"

    if [ -f "$xref_json" ] && [ -f "$xref_bib" ]; then
        echoerr "  $doi already processed, skipping"
        continue
    fi

    echoerr "  Processing $doi"
    curl "https://api.crossref.org/v1/works/$url_doi" > "$xref_json"
    sleep 0.25
    curl "https://search.crossref.org/citation?format=bibtex&doi=$url_doi" > "$xref_bib"
    sleep 0.75
done < works.txt


# build markdown publication file...very hackily since this is taking too much time
echoerr "Building publications.md"
pub_idx=0
while IFS="" read -r doi; do
    echoerr "  Processing $doi"
    pub_idx=$(( pub_idx + 1 ))
    url_doi=$(url_encode "$doi")
    xref_json="doi-$url_doi.json"
    xref_bib="doi-$url_doi.bib"

    authors=$(
        jq -r --arg name "$match_name" '
            def abbreviate:
                split(" ") | map(.[:1] + ".") | join(" ");

            .message.author | map(
                "\(.given | abbreviate) \(.family)" |
                # bold name in author list if it matches
                if . == $name then
                    "**\(.)**"
                else
                    .
                end
            ) | join(", ")' "$xref_json"
    )
    title=$(jq -r '.message.title[0]' "$xref_json")
    journal=$(jq -r '.message | .["container-title"][0]' "$xref_json")
    year=$(grep 'year = ' "$xref_bib" | tr -d -c '[0-9]')
    volume=$(grep 'volume = ' "$xref_bib" | sed -e 's/^.*{//' -e 's/}.*$//')
    pages=$(grep 'pages = ' "$xref_bib" | sed -e 's/^.*{//' -e 's/}.*$//')

    markdown="$authors, _${title}_, $journal"
    if [ ! -z "$volume" ]; then markdown+=" $volume"; fi
    markdown+=", "
    if [ ! -z "$pages" ]; then markdown+="$pages "; fi
    markdown+="[$doi](https://doi.org/$url_doi)"
    if [ ! -z "$year" ]; then markdown+=" ($year)"; fi
    markdown+="."

    markdown=$(sed 's/--/–/g' <<< "$markdown")
    if [ "$output_numbered_list" = true ]; then
        echo "$pub_idx. $markdown"
    else
        echo "* $markdown"
    fi
done < works.txt > publications.md
echoerr "Done"
	#!/bin/bash

	# dependencies: jq, grep, sed, tr, curl, and perl URI::Escape (or replace url_encode function)
	set -e
	orcid="0000-0002-3512-7146"
	# name to match when bolding in author list, should be abbreviated as in
	# "Given Names Last" -> "G. N. Last"
	match_name="C. Daniels"

	# whether to output as a numbered list instead of unordered (i.e. bullet points)
	output_numbered_list=true

	function url_encode {
	# note: requires `perl-uri` on arch linux
	perl -MURI::Escape -e 'print uri_escape($ARGV[0]);' "${1?"Missing argument"}"
	}

	function echoerr {
	echo "$@" 1>&2
	}

	function get_dois_from {
	jq -r "
	# get the DOI for a ORCID group entry
	def group_doi:
	.groupId as \$gid \|
	.externalIdentifiers \| map(
	# we are only looking for DOIs here
	select(
	.externalIdentifierType.value == \"doi\" and
	.relationship.value == \"self\"
	) \|
	# actual DOI string (e.g., '10.1021/acsnano.9b05817')
	.externalIdentifierId.value
	) \|
	if length == 0 then
	error(\"Missing DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\")
	elif length > 1 then
	error(\"More than one DOI for ORCID entry [groupId = \(\$gid)]. Please edit works.json and try again.\")
	else
	.[0]
	end;

	# just get the associated DOI for each group entry
	.groups[] \| group_doi
	" "${1?"missing input json file argument"}"
	}

	if [ -f "works.json" ]; then
	echoerr "Reusing existing works.json for ORCID $orcid"
	else
	echoerr "Fetching works.json for ORCID $orcid"
	# get works in order of latest-oldest
	curl "https://orcid.org/$orcid/worksPage.json?offset=0&sort=date&sortAsc=false" \
	-H 'Accept: application/json, text/plain, /' \
	-H "Referer: https://orcid.org/$orcid" \
	> works.json
	fi

	# get dois
	echoerr "Getting DOIs from works.json"
	get_dois_from works.json > works.txt
	echoerr "Got $(wc -l < works.txt) DOIs from works.json"

	# get data from crossref
	echoerr "Getting info from crossref for each DOI"
	while IFS="" read -r doi; do
	# url encode because /'s
	url_doi=$(url_encode "$doi")
	# output filenames
	xref_json="doi-$url_doi.json"
	xref_bib="doi-$url_doi.bib"

	if [ -f "$xref_json" ] && [ -f "$xref_bib" ]; then
	echoerr " $doi already processed, skipping"
	continue
	fi

	echoerr " Processing $doi"
	curl "https://api.crossref.org/v1/works/$url_doi" > "$xref_json"
	sleep 0.25
	curl "https://search.crossref.org/citation?format=bibtex&doi=$url_doi" > "$xref_bib"
	sleep 0.75
	done < works.txt


	# build markdown publication file...very hackily since this is taking too much time
	echoerr "Building publications.md"
	pub_idx=0
	while IFS="" read -r doi; do
	echoerr " Processing $doi"
	pub_idx=$(( pub_idx + 1 ))
	url_doi=$(url_encode "$doi")
	xref_json="doi-$url_doi.json"
	xref_bib="doi-$url_doi.bib"

	authors=$(
	jq -r --arg name "$match_name" '
	def abbreviate:
	split(" ") \| map(.[:1] + ".") \| join(" ");

	.message.author \| map(
	"\(.given \| abbreviate) \(.family)" \|
	# bold name in author list if it matches
	if . == $name then
	"\(.)"
	else
	.
	end
	) \| join(", ")' "$xref_json"
	)
	title=$(jq -r '.message.title[0]' "$xref_json")
	journal=$(jq -r '.message \| .["container-title"][0]' "$xref_json")
	year=$(grep 'year = ' "$xref_bib" \| tr -d -c '[0-9]')
	volume=$(grep 'volume = ' "$xref_bib" \| sed -e 's/^.{//' -e 's/}.$//')
	pages=$(grep 'pages = ' "$xref_bib" \| sed -e 's/^.{//' -e 's/}.$//')

	markdown="$authors, _${title}_, $journal"
	if [ ! -z "$volume" ]; then markdown+=" $volume"; fi
	markdown+=", "
	if [ ! -z "$pages" ]; then markdown+="$pages "; fi
	markdown+="[$doi](https://doi.org/$url_doi)"
	if [ ! -z "$year" ]; then markdown+=" ($year)"; fi
	markdown+="."

	markdown=$(sed 's/--/–/g' <<< "$markdown")
	if [ "$output_numbered_list" = true ]; then
	echo "$pub_idx. $markdown"
	else
	echo "* $markdown"
	fi
	done < works.txt > publications.md
	echoerr "Done"