Last active
August 29, 2015 14:10
-
-
Save lmmx/3c9406c4ec2c42b82158 to your computer and use it in GitHub Desktop.
Pipeline of .bashrc functions to deal with Pubmed on the command line.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function cutf (){ cut -d $'\t' -f "$@"; } | |
function striptoalpha (){ for thisword in $(echo "$@" | tr -dc "[A-Z][a-z]\n" | tr [A-Z] [a-z]); do echo $thisword; done; } | |
function pubmed (){ esearch -db pubmed -query "$@" | efetch -format docsum | xtract -pattern DocumentSummary -present Author -and Title -element Id -first "Author/Name" -element Title; } | |
function pubmeddocsum (){ esearch -db pubmed -query "$@" | efetch -format docsum; } | |
function pubmedextractdoi (){ pubmeddocsum "$@" | xtract -pattern DocumentSummary -element Id -first "Author/Name" -element Title SortPubDate -block ArticleId -match "IdType:doi" -element Value | awk '{split($0,a,"\t"); split(a[4],b,"/"); print a[1]"\t"a[2]"\t"a[3]"\t"a[5]"\t"b[1]}'; } | |
function pubmeddoi (){ pubmedextractdoi "$@" | cutf 4; } | |
function pubmeddoimulti (){ | |
xtracted=$(pubmedextractdoi "$@") | |
if [[ $(echo "$xtracted" | cutf 4) == '' ]] | |
then | |
xtractedpmid=$(echo "$xtracted" | cutf 1) | |
pmid2doirestful "$xtractedpmid" | |
else | |
echo "$xtracted" | cutf 4 | |
fi | |
} | |
function pmid2doi (){ curl -s www.pmid2doi.org/rest/json/doi/"$@" | awk '{split($0,a,",\"doi\":\"|\"}"); print a[2]}'; } | |
function pmid2doimulti (){ | |
curleddoi=$(pmid2doi "$@") | |
if [[ $curleddoi == '' ]] | |
then | |
pmid2doincbi "$@" | |
else | |
echo "$curleddoi" | |
fi | |
} | |
function pmid2doincbi (){ | |
xtracteddoi=$(pubmedextractdoi "$@") | |
if [[ $xtracteddoi == '' ]] | |
then | |
echo "DOI NA" | |
else | |
echo "$xtracteddoi" | |
fi | |
} | |
function AddPubTableDOIsSimple () { | |
old_IFS=$IFS | |
IFS=$'\n' | |
for line in $(cat "$@"); do | |
AddPubDOI "$line" | |
done | |
IFS=$old_IFS | |
} | |
# Came across NCBI rate throttling while trying to call AddPubDOI in parallel, so added a second attempt for "DOI NA" | |
# and also writing STDOUT output to STDERR as this function will be used on a file (meaning STDOUT will get silenced) | |
# so you can see progress through the lines, as in: | |
# AddPubTableDOIs table.tsv > outputfile.tsv | |
# I'd recommend it's not wise to overwrite unless you're using version control. | |
function AddPubTableDOIs () { | |
old_IFS=$IFS | |
IFS=$'\n' | |
for line in $(cat "$@"); do | |
DOIresp=$(AddPubDOI "$line" 2>/dev/null) | |
if [[ $DOIresp =~ 'DOI NA' ]]; then | |
# try again in case it's just NCBI rate throttling, but just the once | |
DOIresp2=$(AddPubDOI "$line" 2>/dev/null) | |
if [[ $(echo "$DOIresp2" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then | |
echo "$DOIresp2" | |
>&2 echo "$DOIresp" | |
else | |
DOIinput=$(echo "$line" | cutf 1-3) | |
echo -e "$DOIinput\tDOI NA: Parse error" | |
>&2 echo "$DOIinput\tDOI NA: Parse error" | |
fi | |
else | |
if [[ $(echo "$DOIresp" | awk 'BEGIN{FS="\t"};{print NF}' | uniq | wc -l) == '1' ]]; then | |
echo "$DOIresp" | |
>&2 echo "$DOIresp" | |
else | |
DOIinput=$(echo "$line" | cutf 1-3) | |
echo -e "$DOIinput\tDOI NA: Parse error" | |
>&2 echo "$DOIinput\tDOI NA: Parse error" | |
fi | |
fi | |
done | |
IFS=$old_IFS | |
} | |
function AddPubDOI (){ | |
if [[ $(echo "$@" | cutf 4) != '' ]]; then | |
echo "$@" | |
continue | |
fi | |
printf "$(echo "$@" | cutf 1-3)\t" | |
thistitle=$(echo "$@" | cutf 3) | |
if [[ $thistitle != 'Title' ]]; then | |
thisauthor=$(echo "$@" | cutf 1) | |
thisyear=$(echo "$@" | cutf 2) | |
round1=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR]") | |
round1hits=$(echo "$round1" | wc -l) | |
if [[ "$round1hits" -gt '1' ]]; then | |
round2=$(pubmeddoimulti "$thistitle AND $thisauthor [AUTHOR] AND ("$thisyear"[Date - Publication] : "$thisyear"[Date - Publication])") | |
round2hits=$(echo "$round2" | wc -l) | |
if [[ "$round2hits" -gt '1' ]]; then | |
round3=$( | |
xtracted=$(pubmedextractdoi "$@") | |
xtractedtitles=$(echo "$xtracted" | cutf 3 | tr -dc "[A-Z][a-z]\n") | |
alphatitles=$(striptoalpha "$xtractedtitles") | |
thistitlealpha=$(striptoalpha "$thistitle") | |
presearchIFS=$IFS | |
IFS=$'\n' | |
titlecounter="1" | |
for searchtitle in $(echo "$alphatitles"); do | |
(( titlecounter++ )) | |
if [[ "$searchtitle" == *"$thistitlealpha"* ]]; then | |
echo "$xtracted" | sed $titlecounter'q;d' | cutf 4 | |
fi | |
done | |
IFS=$presearchIFS | |
) | |
round3hits=$(echo "$round3" | wc -l) | |
if [[ "$round3hits" -gt '1' ]]; then | |
echo "ERROR multiple DOIs after 3 attempts to reduce - "$round3 | |
else | |
echo $round3 | |
fi | |
else | |
echo $round2 | |
fi | |
else | |
echo $round1 | |
fi | |
fi | |
} | |
function pmid2doirestful (){ | |
curleddoi=$(pmid2doi "$@") | |
if [[ $curleddoi == '' ]] | |
then | |
echo "DOI NA" | |
else | |
echo "$curleddoi" | |
fi | |
} | |
function mmrlit { cat ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; } | |
function mmrlitedit { vim ~/Dropbox/Y3/MMR/Essay/literature_table.tsv; } | |
function mmrlitgrep (){ grep -i "$@" ~/Dropbox/Y3/MMR/Essay/literature_table_with_DOIs.tsv; } | |
function mmrlitdoi (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | tr -d '\n' | xclip -sel p; clipconfirm; } | |
function mmrlitdoicite (){ mmrlitgrep "$@" | cut -d $'\t' -f 4 | awk '{print "`r citet(\""$0"\")`"}' | tr -d '\n' | xclip -sel p; clipconfirm; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# As well as processing a tab-separated file if passed to AddPubTableDOIs, the AddPubDOI can be used to make a DOI | |
# search box if tied to a keyboard shortcut (creating a terminal, instructing it to start a bash shell with commands) | |
# NB: `xclip -sel p` saved to clipboard, but the clipboard was cleared upon closing this window. | |
# - Installing parcellite clipboard manager and ensuring it monitors the new | |
# - clipboard entry ensures retention after the bash child process exits | |
# Using a terminal profile with larger text called "Bigcommands", I tie the keyboard shortcut to: | |
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read dummyvar;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands" | |
# Optional extra: check if the spacebar was pressed at the end. If so, open the article webpage | |
# gnome-terminal -e "bash -c \"source /home/louis/.bashrc; read pmstr; getdoispaced "$pmstr" | tr -d '\n' | xclip -sel p; clipconfirm; parcellite -p > /dev/null; read -d'' -s -n1; if [[ $REPLY = ' ' ]]; then google-chrome "http://dx.doi.org/"$(xclip -o) > /dev/null 2>&1; fi;\"" --geometry 80x2 --title="Search for a DOI:" --window-with-profile="Bigcommands" | |
function cuts (){ cut -d ' ' -f "$@"; } | |
function getdoispaced (){ | |
if [[ $(echo "$@" | cuts 2) =~ [0-2][0-9]{3} ]]; then | |
tabseppub=$(echo -e "$(echo $@ | cuts 1-2 | tr ' ' '\t')\t$(echo $@ | cuts 3-)") | |
AddPubDOI "$tabseppub" | cutf 4 | |
fi | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment