Skip to content

Instantly share code, notes, and snippets.

@bshillingford
Last active July 9, 2020 10:09
Show Gist options
  • Save bshillingford/4d7a9c1fa3ee4c01f0ff to your computer and use it in GitHub Desktop.
Save bshillingford/4d7a9c1fa3ee4c01f0ff to your computer and use it in GitHub Desktop.
useful arxiv tools for bash
function arxiv-authors() {
if [ $# -ne 1 ]; then
echo "Usage: arxiv-authors arxiv_url_or_id"
echo "Prints authors' names, one per line."
else
if [[ $1 == http* ]]; then
url=$1
else
url=http://arxiv.org/abs/$1
fi
curl -s $(echo $url | sed -e 's/.pdf$//; s#/pdf/#/abs/#') | grep '<a href="/find/.*/au' | perl -pe 's/^.*?<a href=".*?">(.*?)<\/a>.*$/\1/'
fi
}
function arxiv-title() {
if [ $# -ne 1 ]; then
echo "Usage: arxiv-title arxiv_url_or_id"
echo "Prints title as a one-line string."
else
if [[ $1 == http* ]]; then
url=$1
else
url=http://arxiv.org/abs/$1
fi
echo $(curl -s $(echo $url | sed -e 's/.pdf$//; s#/pdf/#/abs/#') | grep 'title>' | sed -e 's/<title>\[[^]]*\] //; s/<\/title>//' | tr -ds '\n' ' ')
fi
}
function arxiv-latest-id() {
if [ $# -ne 1 ]; then
echo "Usage: arxiv-latest-id arxiv_url_or_id"
echo "Prints latest arxiv version, e.g.: XXXX.YYYYYv3"
else
if [[ $1 == http* ]]; then
url=$1
else
url=http://arxiv.org/abs/$1
fi
echo $(curl -s $(echo $url | sed -e 's/.pdf$//; s#/pdf/#/abs/#') | grep 'accesskey="f"' | perl -pe 's/^.*<a href="\/pdf\/([^"]+)" .*$/\1/')
fi
}
function arxiv-dl() {
if [ $# -ne 1 ]; then
echo "Usage: arxiv-dl arxiv_url_or_id"
echo "Downloads arxiv pdf to stdout."
else
if [[ $1 == http* ]]; then
url=$1
else
url=http://arxiv.org/pdf/$1
fi
if [[ $url != *pdf ]]; then
url=${url}.pdf
fi
url=$(echo $url | sed -e 's#/abs/#/pdf/#')
echo "Download PDF URL: $url" > /dev/stderr
curl -A 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0' $url
fi
}
function arxiv-dl-file() {
if [ $# -ne 1 ]; then
echo "Usage: arxiv-dl-file arxiv_url_or_id"
echo "Downloads arxiv pdf to a file in the current directory."
echo "Filename: title_with_no_spaces__Author1Author2__1111.1234v3.pdf"
else
# cleaned-up title (no punctuation, _-separated)
title=$(arxiv-title $1 | tr [:upper:] [:lower:] | tr -cs [:alnum:] '_')
# lowercased ID:
latest_id=$(arxiv-latest-id $1 | tr '[:upper:]' '[:lower:]' | tr -s ' ' '_')
# last names only, in camel-case
authors=$(arxiv-authors $1 | perl -pe 's/^.* (.*)$/\1/' | tr -d '\n')
filename="${title}__${authors}__${latest_id}.pdf"
echo "Downloading to $filename..." > /dev/stderr
arxiv-dl $1 > "${title}__${authors}__${latest_id}.pdf"
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment