Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
inspired by the perl version - but not quite the same. Simply use to download your bibtex file and attachments on a regular basis. NB: Only downloads the PDF if not already present so technically only 2 calls at a minimum to CUL (Login and download of bibtex. Obviously a lot more if downloading the PDFs). If more than one attachment - will only …
#!/usr/bin/env python
# Contact: Will Wade willwa.de
# Date: April 2013
# Needs mechanize and pybtex
#
# NB: Little error checking going on in this script
# TO-DO: Check last-download-date of bibtex file later than last-modified date on CUL. ? possible
#
# With thanks to https://pypi.python.org/pypi/citeulike_api/0.1.3dev for the login part
import mechanize
import time
from pybtex.database.input import bibtex
# settings
cUser = 'willwade'
cPass = 'imnotstupid'
localDir = '/Users/willwade/Dropbox/Papers/'
class CulError(Exception):
pass
class CiteULikeReader(object):
MIN_API_WAIT = 5
def __init__(self, user, password, localDir=''):
""" Start up... """
self.cUser = user
self.cPass = password
self.loggedin = False
self.getPDFs = True
self.cites = ''
self.localDir = localDir
self.last_api_access = time.time() - self.MIN_API_WAIT
self.loginToCiteULike()
def wait_for_api_limit(self, min_wait=0):
min_wait = max(min_wait, self.MIN_API_WAIT)
now = time.time()
elapsed_time = now - self.last_api_access
if elapsed_time<min_wait:
time.sleep(min_wait-elapsed_time)
self.last_api_access = time.time()
def loginToCiteULike(self):
"""
Handle login. This should populate our cookie jar.
"""
self.browser = mechanize.Browser()
self.browser.set_handle_robots(False)
self.browser.addheaders = [
("User-agent", 'willwade/willwade@gmail.com citeusyncpy/1.0'),
]
self.browser.open('http://www.citeulike.org/login?from=/')
self.browser.select_form(name='frm')
self.browser["username"] = self.cUser
self.browser["password"] = self.cPass
self.loggedin = True
self.wait_for_api_limit()
try:
#handle redirects manually to avoid connection flakiness
self.browser.set_handle_redirect(False)
resp = self.browser.submit()
except mechanize.HTTPError, e:
#This may not work for gold users. See http://www.citeulike.org/groupforum/2949?highlight=41927#msg_41927 for ideas.. feel free to write
if e.getcode()!=302 : raise e
next_page = e.info().getheader('Location')
if next_page == 'http://www.citeulike.org/' :
#success
self.logged_in = True
elif next_page.find('status=login-failed')>=0:
raise CulError('Login Failed')
else:
err = CulError('Unknown login response')
err.data = e
raise err
finally:
self.browser.set_handle_redirect(True)
#return ''.join(response.readlines())
def getBibText(self):
self.browser.retrieve('http://www.citeulike.org/bibtex/user/'+self.cUser+'?do_username_prefix=0&key_type=4&incl_amazon=0&clean_urls=1&smart_wrap=0&export_attachment_names=t&fieldmap=posted-at:date-added',localDir+self.cUser+'.bib')
def downloadPDFS(self):
#open a bibtex file
parser = bibtex.Parser()
bibdata = parser.parse_file(localDir+self.cUser+'.bib')
#loop through the individual references
for bib_id in bibdata.entries:
b = bibdata.entries[bib_id].fields
try:
filedl = b["citeulike-attachment-1"].split(';')[1].strip()
file_name = filedl.split('/')[7]
filedl = 'http://www.citeulike.org'+filedl
try:
with open(localDir+file_name): pass
except IOError:
# Doesn't exist. Download it
(filename, headers) = self.browser.retrieve(filedl,localDir+file_name)
self.wait_for_api_limit()
# field may not exist for a reference
except(KeyError):
continue
cureader = CiteULikeReader(cUser, cPass, localDir)
cureader.getBibText()
cureader.downloadPDFS()
@simonjudge
Copy link

simonjudge commented Oct 30, 2013

Awesome! (Just need to carerfully check importing to Mendley is OK).

Had to change line 103 to: (filename, headers) = self.browser.retrieve(filedl,localDir+'/'+file_name)
To cope with rubbish Windows... (-;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment