Skip to content

Instantly share code, notes, and snippets.

@willwade
Last active December 21, 2015 09:59
Show Gist options
  • Save willwade/6288982 to your computer and use it in GitHub Desktop.
Save willwade/6288982 to your computer and use it in GitHub Desktop.
This takes one local bibtex file which may have pdf attachments linked to it - and then looks at another citeulike users account for any pdf's that may be missing. NB: I would ideally like to make use of the JSON API for CiteULike but its a little hazy on the uploading. NB2: I needed to patch my httplib with this: http://bugs.python.org/issue11898
#!/usr/bin/env python
# Contact: Will Wade willwa.de
# Date: April 2013
# Needs mechanize and pybtex
#
# Give a user name on citeulike and their pass
# Provide with a local bibtex file which has links to attachments
# upload any attachments to the users citeulike library
import mechanize
import time
from pybtex.database.input import bibtex
import os, urllib
import simplejson as json
# settings
cUser = 'testuser'
cPass = 'testpass'
localbib = '/Users/willwade/Dropbox/Papers/willwade.bib'
localpapers = '/Users/willwade/Dropbox/Papers/'
tempdir = '/Users/willwade/Desktop/'
class CulError(Exception):
pass
class CiteULikeReader(object):
MIN_API_WAIT = 5
def __init__(self, user, password, localbib='', localpapers='.',tempdir='/tmp/'):
""" Start up... """
self.cUser = user
self.cPass = password
self.loggedin = False
self.cites = ''
self.localbib = localbib
self.localpapers = localpapers
self.tempdir = tempdir
self.last_api_access = time.time() - self.MIN_API_WAIT
checkparser = bibtex.Parser()
self.checkbibdata = checkparser.parse_file(localbib)
self.loginToCiteULike()
def wait_for_api_limit(self, min_wait=0):
min_wait = max(min_wait, self.MIN_API_WAIT)
now = time.time()
elapsed_time = now - self.last_api_access
if elapsed_time<min_wait:
time.sleep(min_wait-elapsed_time)
self.last_api_access = time.time()
def loginToCiteULike(self):
"""
Handle login. This should populate our cookie jar.
"""
self.browser = mechanize.Browser()
self.browser.set_handle_robots(False)
self.browser.addheaders = [
("User-agent", 'willwade/willwade@gmail.com citeusyncpy/1.0'),
]
self.browser.open('http://www.citeulike.org/login?from=/')
self.browser.select_form(name='frm')
self.browser["username"] = self.cUser
self.browser["password"] = self.cPass
self.loggedin = True
self.wait_for_api_limit()
try:
#handle redirects manually to avoid connection flakiness
self.browser.set_handle_redirect(False)
resp = self.browser.submit()
except mechanize.HTTPError, e:
#This may not work for non-gold users. See http://www.citeulike.org/groupforum/2949?highlight=41927#msg_41927 for ideas.. feel free to write
if e.getcode()!=302 : raise e
next_page = e.info().getheader('Location')
if next_page == 'http://www.citeulike.org/' :
#success
self.logged_in = True
elif next_page.find('status=login-failed')>=0:
raise CulError('Login Failed')
else:
err = CulError('Unknown login response')
err.data = e
raise err
finally:
self.browser.set_handle_redirect(True)
#return ''.join(response.readlines())
def loginToCiteULikeJSON(self):
"""
Handle login. This should populate our cookie jar. NB: I'd like this to work
"""
self.browser = mechanize.Browser()
self.browser.set_handle_robots(False)
self.browser.addheaders = [
("User-agent", 'willwade/willwade@gmail.com citeuulpy/1.0'),
]
data = json.dumps({'username':self.cUser, 'password':self.cPass})
res = self.browser.open('http://www.citeulike.org/login.json', data=json.dumps(data))
print res
self.loggedin = True
self.wait_for_api_limit()
def uploadFileToCitationJSON(self,artid, file):
data = {'username' : self.cUser,
'article_id' : artid,
'file' : open(file, 'rb')
}
self.browser.addheaders = {'Content-type': 'multipart/form-data'}
self.browser.open('http://www.citeulike.org/personal_pdf_upload.json', json.dumps(data))
self.wait_for_api_limit()
print self.browser.response().get_data()
exit()
return r
def getBibText(self):
self.browser.retrieve('http://www.citeulike.org/bibtex/user/'+self.cUser+'?do_username_prefix=0&key_type=4&incl_amazon=0&clean_urls=1&smart_wrap=0&export_attachment_names=t&fieldmap=posted-at:date-added',self.tempdir+self.cUser+'.bib')
def doesCiteExist(self,citation):
#loop through the individual references
for bib_id in self.checkbibdata.entries:
if bib_id == citation:
b = self.checkbibdata.entries[bib_id].fields
if "citeulike-attachment-1" in b:
filedl = b["citeulike-attachment-1"].split(';')[1].strip()
file_name = filedl.split('/')[7]
return file_name
return False
def uploadFileToCitationMech(self,artid,file):
print 'so want to upload', file, ' to ', artid
self.browser.open('http://www.citeulike.org/user/'+cUser+'/article/'+artid)
self.browser.select_form(name="fileupload_frm")
# NB: This will break if not /users/willwade/Dropbox/papers
self.browser.form.add_file(open(file, 'rt'), 'application/pdf'.encode('ascii'), filename=file.split('/')[5], name='file')
try:
resp = self.browser.submit()
self.wait_for_api_limit()
except mechanize.HTTPError, e:
print 'error'
print e.getcode()
print resp.read()
exit()
def uploadFileToCitation(self,artid,file):
print 'so want to upload', file, ' to ', artid
data = urllib.urlencode({'username' : self.cUser,
'article_id' : artid,
'file' : open(file, 'rb'),
'check' : 'v2'
})
print {'username' : self.cUser,
'article_id' : artid,
'file' : file,
'check' : 'v2'
}
self.browser.open('http://www.citeulike.org/personal_pdf_upload', data)
print self.browser.response().get_data()
exit()
def parseUserBibTex(self):
# ok so go through the downloaded bibtex file. If there is an attachment move on. if there isn't lets find one..
print self.tempdir+self.cUser+'.bib'
parser = bibtex.Parser()
bibdata = parser.parse_file(self.tempdir+self.cUser+'.bib')
#loop through the individual references
for bib_id in bibdata.entries:
# does the entry already have a attachment? If so lets move on..
b = bibdata.entries[bib_id].fields
if "citeulike-attachment-1" not in b:
fileorfalse = self.doesCiteExist(bib_id)
if fileorfalse:
print self.localpapers+fileorfalse
if os.path.isfile(self.localpapers+fileorfalse):
print 'exists & ready to upload'
self.uploadFileToCitationMech(b['citeulike-article-id'],self.localpapers+fileorfalse)
else:
print bib_id,'...exists'
cureader = CiteULikeReader(cUser, cPass, localbib, localpapers, tempdir)
cureader.getBibText()
cureader.parseUserBibTex()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment