Skip to content

Instantly share code, notes, and snippets.

@lmmx
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lmmx/3c9406c4ec2c42b82158 to your computer and use it in GitHub Desktop.
Save lmmx/3c9406c4ec2c42b82158 to your computer and use it in GitHub Desktop.
Pipeline of .bashrc functions to deal with Pubmed on the command line.
function cutf (){ cut -d $'\t' -f "$@"; }
function striptoalpha (){ for thisword in $(echo "$@" | tr -dc "[A-Z][a-z]\n" | tr [A-Z] [a-z]); do echo $thisword; done; }
function pubmed (){ esearch -db pubmed -query "$@" | efetch -format docsum | xtract -pattern DocumentSummary -present Author -and Title -element Id -first "Author/Name" -element Title; }
function pubmeddocsum (){ esearch -db pubmed -query "$@" | efetch -format docsum; }
function pubmedextractdoi (){ pubmeddocsum "$@" | xtract -pattern DocumentSummary -element Id -first "Author/Name" -element Title SortPubDate -block ArticleId -match "IdType:doi" -element Value | awk '{split($0,a,"\t"); split(a[4],b,"/"); print a[1]"\t"a[2]"\t"a[3]"\t"a[5]"\t"b[1]}'; }
function pubmeddoi (){ pubmedextractdoi "$@" | cutf 4; }
function pubmeddoimulti (){
xtracted=$(pubmedextractdoi "$@")
if [[ $(echo "$xtracted" | cutf 4) == '' ]]
then
xtractedpmid=$(echo "$xtracted" | cutf 1)
pmid2doirestful "$xtractedpmid"
else
echo "$xtracted" | cutf 4
fi
}
function pmid2doi (){ curl -s www.pmid2doi.org/rest/json/doi/"$@" | awk '{split($0,a,",\"doi\":\"|\"}"); print a[2]}'; }
function pmid2doimulti (){
curleddoi=$(pmid2doi "$@")
if [[ $curleddoi == '' ]]
then
pmid2doincbi "$@"
else
echo "$curleddoi"
fi
}
function pmid2doincbi (){
xtracteddoi=$(pubmedextractdoi "$@")
if [[ $xtracteddoi == '' ]]
then
echo "DOI NA"
else
echo "$xtracteddoi"
fi
}
function AddPubTableDOIsSimple () {
old_IFS=$IFS
IFS=$'\n'
for line in $(cat "$@"); do
AddPubDOI "$line"
done
IFS=$old_IFS
}
# Came across NCBI rate throttling while trying to call AddPubDOI in parallel, so added a second attempt for "DOI NA"
# and also writing STDOUT output to STDERR as this function will be used on a file (meaning STDOUT will get silenced)
# so you can see progress through the lines, as in:
# AddPubTableDOIs table.tsv > outputfile.tsv
# I'd recommend it's not wise to overwrite unless you're using version control.
function AddPubTableDOIs () {
old_IFS=$IFS
IFS=$'\n'
for line in $(cat "$@"); do
DOIresp=$(AddPubDOI "$line" 2>/dev/null)
if [[ $DOIresp =~ 'DOI NA' ]]; then
# try again in case it's just NCBI rate throttling, but just the once
DOIresp2=$(AddPubDOI "$line" 2>/dev/null)
if [[ $(echo "$DOIresp2" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then
echo "$DOIresp2"
>&2 echo "$DOIresp"
else
DOIinput=$(echo "$line" | cutf 1-3)
echo -e "$DOIinput\tDOI NA: Parse error"
>&2 echo "$DOIinput\tDOI NA: Parse error"
fi
else
if [[ $(echo "$DOIresp" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then
echo "$DOIresp"
>&2 echo "$DOIresp"
else
DOIinput=$(echo "$line" | cutf 1-3)
echo -e "$DOIinput\tDOI NA: Parse error"
>&2 echo "$DOIinput\tDOI NA: Parse error"
fi
fi
done
IFS=$old_IFS
}
function AddPubDOI (){
if [[ $(echo "$@" | cutf 4) != '' ]]; then
echo "$@"
continue
fi
printf "$(echo "$@" | cutf 1-3)\t"
thistitle=$(echo "$@" | cutf 3)
if [[ $thistitle != 'Title' ]]; then
thisauthor=$(echo "$@" | cutf 1)
thisyear=$(echo "$@" | cutf 2)
round1=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR]")
round1hits=$(echo "$round1" | wc -l)
if [[ "$round1hits" -gt '1' ]]; then
round2=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR] AND ("$thisyear"[Date - Publication] : "$thisyear"[Date - Publication])")
round2hits=$(echo "$round2" | wc -l)
if [[ "$round2hits" -gt '1' ]]; then
round3=$(
xtracted=$(pubmedextractdoi "$@")
xtractedtitles=$(echo "$xtracted" | cutf 3 | tr -dc "[A-Z][a-z]\n")
alphatitles=$(striptoalpha "$xtractedtitles")
thistitlealpha=$(striptoalpha "$thistitle")
presearchIFS=$IFS
IFS=$'\n'
titlecounter="1"
for searchtitle in $(echo "$alphatitles"); do
(( titlecounter++ ))
if [[ "$searchtitle" == *"$thistitlealpha"* ]]; then
echo "$xtracted" | sed $titlecounter'q;d' | cutf 4
fi
done
IFS=$presearchIFS
)
round3hits=$(echo "$round3" | wc -l)
if [[ "$round3hits" -gt '1' ]]; then
echo "ERROR multiple DOIs after 3 attempts to reduce - "$round3
else
echo $round3
fi
else
echo $round2
fi
else
echo $round1
fi
fi
}
function pmid2doirestful (){
curleddoi=$(pmid2doi "$@")
if [[ $curleddoi == '' ]]
then
echo "DOI NA"
else
echo "$curleddoi"
fi
}
function mmrlit { cat ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; }
function mmrlitedit { vim ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; }
function mmrlitgrep (){ grep -i "$@" ~/Dropbox/Y3/MMR/Essay/literature_table_with_DOIs.tsv; }
function mmrlitdoi (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | tr -d '\n' | xclip -sel p; clipconfirm; }
function mmrlitdoicite (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | awk '{print "`r citet(\""$0"\")`"}' | tr -d '\n' | xclip -sel p; clipconfirm; }
# As well as processing a tab-separated file if passed to AddPubTableDOIs, the AddPubDOI can be used to make a DOI
# search box if tied to a keyboard shortcut (creating a terminal, instructing it to start a bash shell with commands)
# NB: `xclip -sel p` saved to clipboard, but the clipboard was cleared upon closing this window.
# - Installing parcellite clipboard manager and ensuring it monitors the new
# - clipboard entry ensures retention after the bash child process exits
# Using a terminal profile with larger text called "Bigcommands", I tie the keyboard shortcut to:
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read dummyvar;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands"
# Optional extra: check if the spacebar was pressed at the end. If so, open the article webpage
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read -d'' -s -n1; if [[ $REPLY = ' ' ]]; then google-chrome "http://dx.doi.org/"$(xclip -o) > /dev/null 2>&1; fi;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands"
function cuts (){ cut -d ' ' -f "$@"; }
function getdoispaced (){
if [[ $(echo "$@" | cuts 2) =~ [0-2][0-9]{3} ]]; then
tabseppub=$(echo -e "$(echo $@ | cuts 1-2 | tr ' ' '\t')\t$(echo $@ | cuts 3-)")
AddPubDOI "$tabseppub" | cutf 4
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment