Skip to content

Instantly share code, notes, and snippets.

@apetresc
Created August 9, 2010 19:49
Show Gist options
  • Save apetresc/515978 to your computer and use it in GitHub Desktop.
Save apetresc/515978 to your computer and use it in GitHub Desktop.
#! /usr/bin/python
## arXiv script version 0.2
## Copyright 2008 Tom Brown
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 3 of the
## License, or (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
## See http://www.stringwiki.org/wiki/ArXiv_script for more usage
## instructions
'''arXiv script
Usage:
python arxiv.py reference [ -htabcjdps ] [ --help ]
"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303.
Options:
-h, --help
displays this help message
-t
displays the title
-a
displays the author(s)
-b
displays the aBstract
-c
displays the comments
-j
displays the journal reference
-d
downloads the PDF
-p
downloads the PS
-s
downloads the source file
'''
__version__ = "0.2"
__author__ = "Tom Brown"
__copyright__ = "Copyright 2008 Tom Brown, GNU GPL 3"
import sys, os, getopt, re, urllib,gzip
def findRefType(ref):
ref = ref.replace('arxiv:','')
if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref):
type = 'old-style eprint'
elif re.search(r'^\d{7}$',ref):
type = 'old-style eprint'
ref = 'hep-th/' + ref
elif re.search('^\d{4}\.\d{4}$',ref):
type = 'new-style eprint'
else:
type = 'not arXiv'
return type, ref
def downloadPDF(ref,type,downloadPath):
downloadPath = os.path.expanduser(downloadPath)
if type == 'old-style eprint':
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf')
elif type == 'new-style eprint':
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf')
def downloadPS(ref,type,downloadPath):
downloadPath = os.path.expanduser(downloadPath)
filename = downloadPath + ref.replace('/','-')
urllib.urlretrieve('http://arxiv.org/ps/' + ref, filename)
gzipFile = gzip.GzipFile(filename)
psFile = open(filename + ".ps","w")
psFile.write(gzipFile.read())
psFile.close()
gzipFile.close()
os.remove(filename)
def downloadSource(ref,type,downloadPath):
downloadPath = os.path.expanduser(downloadPath)
filename = downloadPath + ref.replace('/','-')
urllib.urlretrieve('http://arxiv.org/e-print/' + ref, filename + ".dum")
gzipFile = gzip.GzipFile(filename + ".dum")
sourceFile = open(filename,"w")
sourceFile.write(gzipFile.read())
sourceFile.close()
gzipFile.close()
os.remove(filename + ".dum")
def getTitle(html):
title = html[html.find(">Title:</span>")+15:]
title = title[:title.find("</h1>")]
return title
def getAuthors(html):
authors = html[html.find(">Authors:</span>"):]
authors = authors[authors.find("\">")+2:]
authors = authors[:authors.find("</div>")]
authors = re.sub('<[^>]*>','',authors)
authors = authors.replace("\n","")
return authors
def getAbstract(html):
abstract = html[html.find("Abstract:</span>")+17:]
abstract = abstract[:abstract.find("</blockquote>")-1]
return abstract
def getComments(html):
if "comments" not in html:
return "no comments"
else:
comments = html[html.find("comments\">")+10:]
comments = comments[:comments.find("</td>")]
return comments
def getJref(html):
if "jref" not in html:
return "no journal reference"
else:
jref = html[html.find("jref\">")+6:]
jref = jref[:jref.find("</td>")]
return jref
if __name__ == "__main__":
authorOpt = 0
titleOpt = 0
abstractOpt = 0
commentsOpt = 0
jrefOpt = 0
pdfOpt = 0
psOpt = 0
sourceOpt = 0
try:
options, arguments = getopt.gnu_getopt(sys.argv[1:],
'hatbcjdpsv', ['help'])
except getopt.error:
print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'arxiv.py -h\' for more information'
sys.exit(0)
for o,a in options:
if o in ('-h','--help'):
print __doc__
sys.exit(0)
elif o == '-a':
authorOpt = 1
elif o == '-t':
titleOpt = 1
elif o == '-b':
abstractOpt = 1
elif o == '-c':
commentsOpt = 1
elif o == '-j':
jrefOpt = 1
elif o == '-d':
pdfOpt = 1
elif o == '-p':
psOpt = 1
elif o == '-s':
sourceOpt = 1
if len(options) == 0:
authorOpt = 1
titleOpt = 1
abstractOpt = 1
commentsOpt = 1
jrefOpt = 1
if len(arguments) != 1:
print 'you didn\'t specify an arXiv reference; try \'arxiv.py -h\' for more information'
sys.exit(0)
else:
ref=arguments[0]
type, ref = findRefType(ref)
if type=="not arXiv":
print "type not of arXiv form"
sys.exit(0)
if authorOpt+titleOpt+abstractOpt+commentsOpt+jrefOpt > 0:
htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref)
html = htmlObject.read()
if titleOpt:
title = getTitle(html)
print title
if authorOpt:
authors = getAuthors(html)
print authors
if abstractOpt:
abstract = getAbstract(html)
print abstract
if commentsOpt:
comments = getComments(html)
print comments
if jrefOpt:
jref = getJref(html)
print jref
if pdfOpt:
downloadPDF(ref,type,"")
if psOpt:
downloadPS(ref,type,"")
if sourceOpt:
downloadSource(ref,type,"")
@apetresc
Copy link
Author

apetresc commented Aug 9, 2010

Just in case the original source goes down: http://www.stringwiki.org/wiki/ArXiv_script

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment