willwade/citeuupload.py

## citeuupload.py
#!/usr/bin/env python
# Contact: Will Wade willwa.de
# Date: April 2013
# Needs mechanize and pybtex
#
# Give a user name on citeulike and their pass
# Provide with a local bibtex file which has links to attachments
# upload any attachments to the users citeulike library

import mechanize
import time
from pybtex.database.input import bibtex
import os, urllib
import simplejson as json

# settings
cUser = 'testuser'
cPass = 'testpass'
localbib = '/Users/willwade/Dropbox/Papers/willwade.bib'
localpapers = '/Users/willwade/Dropbox/Papers/'
tempdir = '/Users/willwade/Desktop/'

class CulError(Exception):
    pass

class CiteULikeReader(object):

    MIN_API_WAIT = 5

    def __init__(self, user, password, localbib='', localpapers='.',tempdir='/tmp/'):
        """ Start up... """
        self.cUser = user
        self.cPass = password
        self.loggedin = False
        self.cites = ''
        self.localbib = localbib
        self.localpapers = localpapers
        self.tempdir = tempdir
        self.last_api_access = time.time() - self.MIN_API_WAIT
        checkparser = bibtex.Parser()
        self.checkbibdata = checkparser.parse_file(localbib)
        self.loginToCiteULike()

    def wait_for_api_limit(self, min_wait=0):
        min_wait = max(min_wait, self.MIN_API_WAIT)
        now = time.time()
        elapsed_time = now - self.last_api_access
        if elapsed_time<min_wait:
            time.sleep(min_wait-elapsed_time)
        self.last_api_access = time.time()

    def loginToCiteULike(self):
        """
        Handle login. This should populate our cookie jar.
        """
        self.browser = mechanize.Browser()
        self.browser.set_handle_robots(False)
        self.browser.addheaders = [
          ("User-agent", 'willwade/willwade@gmail.com citeusyncpy/1.0'),
        ]
        self.browser.open('http://www.citeulike.org/login?from=/')
        self.browser.select_form(name='frm')
        self.browser["username"] = self.cUser
        self.browser["password"] = self.cPass
        self.loggedin = True

        self.wait_for_api_limit()

        try:
            #handle redirects manually to avoid connection flakiness
            self.browser.set_handle_redirect(False)
            resp = self.browser.submit()
        except mechanize.HTTPError, e:
            #This may not work for non-gold users. See http://www.citeulike.org/groupforum/2949?highlight=41927#msg_41927 for ideas.. feel free to write
            if e.getcode()!=302 : raise e
            next_page = e.info().getheader('Location')
            if next_page == 'http://www.citeulike.org/' :
                #success
                self.logged_in = True
            elif next_page.find('status=login-failed')>=0:
                raise CulError('Login Failed')
            else:
                err = CulError('Unknown login response')
                err.data = e
                raise err
        finally:
            self.browser.set_handle_redirect(True)
        #return ''.join(response.readlines())

    def loginToCiteULikeJSON(self):
        """
        Handle login. This should populate our cookie jar. NB: I'd like this to work
        """
        self.browser = mechanize.Browser()
        self.browser.set_handle_robots(False)
        self.browser.addheaders = [
          ("User-agent", 'willwade/willwade@gmail.com citeuulpy/1.0'),
        ]
        data = json.dumps({'username':self.cUser, 'password':self.cPass})
        res = self.browser.open('http://www.citeulike.org/login.json', data=json.dumps(data))
        print res
        self.loggedin = True
        self.wait_for_api_limit()


    def uploadFileToCitationJSON(self,artid, file):
        data = {'username' : self.cUser,
                'article_id' : artid,
                'file' : open(file, 'rb')
                }
        self.browser.addheaders = {'Content-type': 'multipart/form-data'}
        self.browser.open('http://www.citeulike.org/personal_pdf_upload.json', json.dumps(data))
        self.wait_for_api_limit()
        print self.browser.response().get_data()
        exit()
        return r


    def getBibText(self):
        self.browser.retrieve('http://www.citeulike.org/bibtex/user/'+self.cUser+'?do_username_prefix=0&key_type=4&incl_amazon=0&clean_urls=1&smart_wrap=0&export_attachment_names=t&fieldmap=posted-at:date-added',self.tempdir+self.cUser+'.bib')

    def doesCiteExist(self,citation):
        #loop through the individual references
        for bib_id in self.checkbibdata.entries:
            if bib_id == citation:
                b = self.checkbibdata.entries[bib_id].fields
                if "citeulike-attachment-1" in b:
                    filedl = b["citeulike-attachment-1"].split(';')[1].strip()
                    file_name = filedl.split('/')[7]
                    return file_name

        return False

    def uploadFileToCitationMech(self,artid,file):
        print 'so want to upload', file, ' to ', artid
        self.browser.open('http://www.citeulike.org/user/'+cUser+'/article/'+artid)
        self.browser.select_form(name="fileupload_frm")
        # NB: This will break if not /users/willwade/Dropbox/papers

        self.browser.form.add_file(open(file, 'rt'), 'application/pdf'.encode('ascii'), filename=file.split('/')[5], name='file')
        try:
            resp = self.browser.submit()
            self.wait_for_api_limit()
        except mechanize.HTTPError, e:
            print 'error'
            print e.getcode()
            print resp.read()
            exit()

    def uploadFileToCitation(self,artid,file):
        print 'so want to upload', file, ' to ', artid
        data = urllib.urlencode({'username' : self.cUser,
				'article_id' : artid,
				'file' : open(file, 'rb'),
				'check' : 'v2'
                })
        print {'username' : self.cUser,
				'article_id' : artid,
				'file' : file,
				'check' : 'v2'
                }
        self.browser.open('http://www.citeulike.org/personal_pdf_upload', data)
        print self.browser.response().get_data()
        exit()

    def parseUserBibTex(self):
        # ok so go through the downloaded bibtex file. If there is an attachment move on. if there isn't lets find one..
        print self.tempdir+self.cUser+'.bib'
        parser = bibtex.Parser()
        bibdata = parser.parse_file(self.tempdir+self.cUser+'.bib')
        #loop through the individual references
        for bib_id in bibdata.entries:
            # does the entry already have a attachment? If so lets move on..
            b = bibdata.entries[bib_id].fields
            if "citeulike-attachment-1"  not in b:
                fileorfalse = self.doesCiteExist(bib_id)
                if  fileorfalse:
                    print self.localpapers+fileorfalse
                    if os.path.isfile(self.localpapers+fileorfalse):
                        print 'exists & ready to upload'
                        self.uploadFileToCitationMech(b['citeulike-article-id'],self.localpapers+fileorfalse)

            else:
                print bib_id,'...exists'

cureader = CiteULikeReader(cUser, cPass, localbib, localpapers, tempdir)
cureader.getBibText()
cureader.parseUserBibTex()
	#!/usr/bin/env python
	# Contact: Will Wade willwa.de
	# Date: April 2013
	# Needs mechanize and pybtex
	#
	# Give a user name on citeulike and their pass
	# Provide with a local bibtex file which has links to attachments
	# upload any attachments to the users citeulike library

	import mechanize
	import time
	from pybtex.database.input import bibtex
	import os, urllib
	import simplejson as json

	# settings
	cUser = 'testuser'
	cPass = 'testpass'
	localbib = '/Users/willwade/Dropbox/Papers/willwade.bib'
	localpapers = '/Users/willwade/Dropbox/Papers/'
	tempdir = '/Users/willwade/Desktop/'

	class CulError(Exception):
	pass

	class CiteULikeReader(object):

	MIN_API_WAIT = 5

	def __init__(self, user, password, localbib='', localpapers='.',tempdir='/tmp/'):
	""" Start up... """
	self.cUser = user
	self.cPass = password
	self.loggedin = False
	self.cites = ''
	self.localbib = localbib
	self.localpapers = localpapers
	self.tempdir = tempdir
	self.last_api_access = time.time() - self.MIN_API_WAIT
	checkparser = bibtex.Parser()
	self.checkbibdata = checkparser.parse_file(localbib)
	self.loginToCiteULike()

	def wait_for_api_limit(self, min_wait=0):
	min_wait = max(min_wait, self.MIN_API_WAIT)
	now = time.time()
	elapsed_time = now - self.last_api_access
	if elapsed_time<min_wait:
	time.sleep(min_wait-elapsed_time)
	self.last_api_access = time.time()

	def loginToCiteULike(self):
	"""
	Handle login. This should populate our cookie jar.
	"""
	self.browser = mechanize.Browser()
	self.browser.set_handle_robots(False)
	self.browser.addheaders = [
	("User-agent", 'willwade/willwade@gmail.com citeusyncpy/1.0'),
	]
	self.browser.open('http://www.citeulike.org/login?from=/')
	self.browser.select_form(name='frm')
	self.browser["username"] = self.cUser
	self.browser["password"] = self.cPass
	self.loggedin = True

	self.wait_for_api_limit()

	try:
	#handle redirects manually to avoid connection flakiness
	self.browser.set_handle_redirect(False)
	resp = self.browser.submit()
	except mechanize.HTTPError, e:
	#This may not work for non-gold users. See http://www.citeulike.org/groupforum/2949?highlight=41927#msg_41927 for ideas.. feel free to write
	if e.getcode()!=302 : raise e
	next_page = e.info().getheader('Location')
	if next_page == 'http://www.citeulike.org/' :
	#success
	self.logged_in = True
	elif next_page.find('status=login-failed')>=0:
	raise CulError('Login Failed')
	else:
	err = CulError('Unknown login response')
	err.data = e
	raise err
	finally:
	self.browser.set_handle_redirect(True)
	#return ''.join(response.readlines())

	def loginToCiteULikeJSON(self):
	"""
	Handle login. This should populate our cookie jar. NB: I'd like this to work
	"""
	self.browser = mechanize.Browser()
	self.browser.set_handle_robots(False)
	self.browser.addheaders = [
	("User-agent", 'willwade/willwade@gmail.com citeuulpy/1.0'),
	]
	data = json.dumps({'username':self.cUser, 'password':self.cPass})
	res = self.browser.open('http://www.citeulike.org/login.json', data=json.dumps(data))
	print res
	self.loggedin = True
	self.wait_for_api_limit()


	def uploadFileToCitationJSON(self,artid, file):
	data = {'username' : self.cUser,
	'article_id' : artid,
	'file' : open(file, 'rb')
	}
	self.browser.addheaders = {'Content-type': 'multipart/form-data'}
	self.browser.open('http://www.citeulike.org/personal_pdf_upload.json', json.dumps(data))
	self.wait_for_api_limit()
	print self.browser.response().get_data()
	exit()
	return r


	def getBibText(self):
	self.browser.retrieve('http://www.citeulike.org/bibtex/user/'+self.cUser+'?do_username_prefix=0&key_type=4&incl_amazon=0&clean_urls=1&smart_wrap=0&export_attachment_names=t&fieldmap=posted-at:date-added',self.tempdir+self.cUser+'.bib')

	def doesCiteExist(self,citation):
	#loop through the individual references
	for bib_id in self.checkbibdata.entries:
	if bib_id == citation:
	b = self.checkbibdata.entries[bib_id].fields
	if "citeulike-attachment-1" in b:
	filedl = b["citeulike-attachment-1"].split(';')[1].strip()
	file_name = filedl.split('/')[7]
	return file_name

	return False

	def uploadFileToCitationMech(self,artid,file):
	print 'so want to upload', file, ' to ', artid
	self.browser.open('http://www.citeulike.org/user/'+cUser+'/article/'+artid)
	self.browser.select_form(name="fileupload_frm")
	# NB: This will break if not /users/willwade/Dropbox/papers

	self.browser.form.add_file(open(file, 'rt'), 'application/pdf'.encode('ascii'), filename=file.split('/')[5], name='file')
	try:
	resp = self.browser.submit()
	self.wait_for_api_limit()
	except mechanize.HTTPError, e:
	print 'error'
	print e.getcode()
	print resp.read()
	exit()

	def uploadFileToCitation(self,artid,file):
	print 'so want to upload', file, ' to ', artid
	data = urllib.urlencode({'username' : self.cUser,
	'article_id' : artid,
	'file' : open(file, 'rb'),
	'check' : 'v2'
	})
	print {'username' : self.cUser,
	'article_id' : artid,
	'file' : file,
	'check' : 'v2'
	}
	self.browser.open('http://www.citeulike.org/personal_pdf_upload', data)
	print self.browser.response().get_data()
	exit()

	def parseUserBibTex(self):
	# ok so go through the downloaded bibtex file. If there is an attachment move on. if there isn't lets find one..
	print self.tempdir+self.cUser+'.bib'
	parser = bibtex.Parser()
	bibdata = parser.parse_file(self.tempdir+self.cUser+'.bib')
	#loop through the individual references
	for bib_id in bibdata.entries:
	# does the entry already have a attachment? If so lets move on..
	b = bibdata.entries[bib_id].fields
	if "citeulike-attachment-1" not in b:
	fileorfalse = self.doesCiteExist(bib_id)
	if fileorfalse:
	print self.localpapers+fileorfalse
	if os.path.isfile(self.localpapers+fileorfalse):
	print 'exists & ready to upload'
	self.uploadFileToCitationMech(b['citeulike-article-id'],self.localpapers+fileorfalse)

	else:
	print bib_id,'...exists'

	cureader = CiteULikeReader(cUser, cPass, localbib, localpapers, tempdir)
	cureader.getBibText()
	cureader.parseUserBibTex()