Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Pipeline of .bashrc functions to deal with Pubmed on the command line.
function cutf (){ cut -d $'\t' -f "$@"; }
function striptoalpha (){ for thisword in $(echo "$@" | tr -dc "[A-Z][a-z]\n" | tr [A-Z] [a-z]); do echo $thisword; done; }
function pubmed (){ esearch -db pubmed -query "$@" | efetch -format docsum | xtract -pattern DocumentSummary -present Author -and Title -element Id -first "Author/Name" -element Title; }
function pubmeddocsum (){ esearch -db pubmed -query "$@" | efetch -format docsum; }
function pubmedextractdoi (){ pubmeddocsum "$@" | xtract -pattern DocumentSummary -element Id -first "Author/Name" -element Title SortPubDate -block ArticleId -match "IdType:doi" -element Value | awk '{split($0,a,"\t"); split(a[4],b,"/"); print a[1]"\t"a[2]"\t"a[3]"\t"a[5]"\t"b[1]}'; }
function pubmeddoi (){ pubmedextractdoi "$@" | cutf 4; }
function pubmeddoimulti (){
xtracted=$(pubmedextractdoi "$@")
if [[ $(echo "$xtracted" | cutf 4) == '' ]]
then
xtractedpmid=$(echo "$xtracted" | cutf 1)
pmid2doirestful "$xtractedpmid"
else
echo "$xtracted" | cutf 4
fi
}
function pmid2doi (){ curl -s www.pmid2doi.org/rest/json/doi/"$@" | awk '{split($0,a,",\"doi\":\"|\"}"); print a[2]}'; }
function pmid2doimulti (){
curleddoi=$(pmid2doi "$@")
if [[ $curleddoi == '' ]]
then
pmid2doincbi "$@"
else
echo "$curleddoi"
fi
}
function pmid2doincbi (){
xtracteddoi=$(pubmedextractdoi "$@")
if [[ $xtracteddoi == '' ]]
then
echo "DOI NA"
else
echo "$xtracteddoi"
fi
}
function AddPubTableDOIsSimple () {
old_IFS=$IFS
IFS=$'\n'
for line in $(cat "$@"); do
AddPubDOI "$line"
done
IFS=$old_IFS
}
# Came across NCBI rate throttling while trying to call AddPubDOI in parallel, so added a second attempt for "DOI NA"
# and also writing STDOUT output to STDERR as this function will be used on a file (meaning STDOUT will get silenced)
# so you can see progress through the lines, as in:
# AddPubTableDOIs table.tsv > outputfile.tsv
# I'd recommend it's not wise to overwrite unless you're using version control.
function AddPubTableDOIs () {
old_IFS=$IFS
IFS=$'\n'
for line in $(cat "$@"); do
DOIresp=$(AddPubDOI "$line" 2>/dev/null)
if [[ $DOIresp =~ 'DOI NA' ]]; then
# try again in case it's just NCBI rate throttling, but just the once
DOIresp2=$(AddPubDOI "$line" 2>/dev/null)
if [[ $(echo "$DOIresp2" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then
echo "$DOIresp2"
>&2 echo "$DOIresp"
else
DOIinput=$(echo "$line" | cutf 1-3)
echo -e "$DOIinput\tDOI NA: Parse error"
>&2 echo "$DOIinput\tDOI NA: Parse error"
fi
else
if [[ $(echo "$DOIresp" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then
echo "$DOIresp"
>&2 echo "$DOIresp"
else
DOIinput=$(echo "$line" | cutf 1-3)
echo -e "$DOIinput\tDOI NA: Parse error"
>&2 echo "$DOIinput\tDOI NA: Parse error"
fi
fi
done
IFS=$old_IFS
}
function AddPubDOI (){
if [[ $(echo "$@" | cutf 4) != '' ]]; then
echo "$@"
continue
fi
printf "$(echo "$@" | cutf 1-3)\t"
thistitle=$(echo "$@" | cutf 3)
if [[ $thistitle != 'Title' ]]; then
thisauthor=$(echo "$@" | cutf 1)
thisyear=$(echo "$@" | cutf 2)
round1=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR]")
round1hits=$(echo "$round1" | wc -l)
if [[ "$round1hits" -gt '1' ]]; then
round2=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR] AND ("$thisyear"[Date - Publication] : "$thisyear"[Date - Publication])")
round2hits=$(echo "$round2" | wc -l)
if [[ "$round2hits" -gt '1' ]]; then
round3=$(
xtracted=$(pubmedextractdoi "$@")
xtractedtitles=$(echo "$xtracted" | cutf 3 | tr -dc "[A-Z][a-z]\n")
alphatitles=$(striptoalpha "$xtractedtitles")
thistitlealpha=$(striptoalpha "$thistitle")
presearchIFS=$IFS
IFS=$'\n'
titlecounter="1"
for searchtitle in $(echo "$alphatitles"); do
(( titlecounter++ ))
if [[ "$searchtitle" == *"$thistitlealpha"* ]]; then
echo "$xtracted" | sed $titlecounter'q;d' | cutf 4
fi
done
IFS=$presearchIFS
)
round3hits=$(echo "$round3" | wc -l)
if [[ "$round3hits" -gt '1' ]]; then
echo "ERROR multiple DOIs after 3 attempts to reduce - "$round3
else
echo $round3
fi
else
echo $round2
fi
else
echo $round1
fi
fi
}
function pmid2doirestful (){
curleddoi=$(pmid2doi "$@")
if [[ $curleddoi == '' ]]
then
echo "DOI NA"
else
echo "$curleddoi"
fi
}
function mmrlit { cat ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; }
function mmrlitedit { vim ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; }
function mmrlitgrep (){ grep -i "$@" ~/Dropbox/Y3/MMR/Essay/literature_table_with_DOIs.tsv; }
function mmrlitdoi (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | tr -d '\n' | xclip -sel p; clipconfirm; }
function mmrlitdoicite (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | awk '{print "`r citet(\""$0"\")`"}' | tr -d '\n' | xclip -sel p; clipconfirm; }
# As well as processing a tab-separated file if passed to AddPubTableDOIs, the AddPubDOI can be used to make a DOI
# search box if tied to a keyboard shortcut (creating a terminal, instructing it to start a bash shell with commands)
# NB: `xclip -sel p` saved to clipboard, but the clipboard was cleared upon closing this window.
# - Installing parcellite clipboard manager and ensuring it monitors the new
# - clipboard entry ensures retention after the bash child process exits
# Using a terminal profile with larger text called "Bigcommands", I tie the keyboard shortcut to:
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read dummyvar;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands"
# Optional extra: check if the spacebar was pressed at the end. If so, open the article webpage
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read -d'' -s -n1; if [[ $REPLY = ' ' ]]; then google-chrome "http://dx.doi.org/"$(xclip -o) > /dev/null 2>&1; fi;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands"
function cuts (){ cut -d ' ' -f "$@"; }
function getdoispaced (){
if [[ $(echo "$@" | cuts 2) =~ [0-2][0-9]{3} ]]; then
tabseppub=$(echo -e "$(echo $@ | cuts 1-2 | tr ' ' '\t')\t$(echo $@ | cuts 3-)")
AddPubDOI "$tabseppub" | cutf 4
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment