Skip to content

Instantly share code, notes, and snippets.

@crosslife
Last active December 17, 2015 04:39
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save crosslife/5551993 to your computer and use it in GitHub Desktop.
Save crosslife/5551993 to your computer and use it in GitHub Desktop.
简单的校园网IEEE下载模块,输入论文链接中的ID自动尝试登录下载(只在校园网有效)
'''
Created on 2013-5-10
@author: crosslife
'''
from urllib import urlopen
import urllib2
import cookielib
import re
import time
import thread
import threading
NAMELIST=[]
def getAbstract(paper_id):
requrl = "http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=" + str(paper_id)
try:
paperPage = urlopen(requrl)
paperText = paperPage.read()
except Exception:
paperText = ""
title = re.findall('<h1>\s*(.+?)\s*</h1>',paperText,re.IGNORECASE)
abs = re.findall('<h2>Abstract</h2>\s*</a>\s*<p>(.+?)</p>',paperText,re.IGNORECASE)
print abs #debug
if title == [] or abs == []:#network exception
return "empty###empty"
abs[0] = title[0] + "###" + abs[0]
return abs[0]
#check for auth, if success,save to cookiefile
def authCheck(cookiefile):
cookies = cookielib.MozillaCookieJar(cookiefile)
cookiehand = urllib2.HTTPCookieProcessor(cookies)
opener = urllib2.build_opener(cookiehand)
opener.addheaders = [("User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"),
("Host","ieeexplore.ieee.org")]
try:
opener.open("http://ieeexplore.ieee.org/xpl/conferences.jsp")
except:
print "network error"
return False
for item in cookies:
if item.name == "xploreCookies":
print "auth success!"
cookies.save(cookiefile, ignore_discard=True, ignore_expires=True)
return True
return False
def downWithCookies(cookiefile,paper_id):
abs = getAbstract(paper_id)
if(abs == "empty###empty"):#mostly caused by network error
print "None Abstract :%d ,retrying..." % int(paper_id)
count = 1
while abs == "empty###empty":
abs = getAbstract(paper_id)
count += 1
if count > 5:
print "cann't get the abstract of %d" % int(paper_id) #mostly caused by no abstract
break
cookies = cookielib.MozillaCookieJar(cookiefile)
cookies.load(cookiefile, ignore_discard=True, ignore_expires=True)
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies))
page = opener.open("http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=" + str(paper_id))
pagetext = page.read()
matchGroup = re.findall('<frame src="(http.*?)"',pagetext,re.IGNORECASE)
try:
pdflink = matchGroup[0]
except IndexError:
print "cookies outdated"
return False
pdfpage = opener.open(pdflink)
pdftext = pdfpage.read()
f = open(str(paper_id)+".pdf","wb")
f.write(pdftext)
f.close()
return True
def timingDown(seconds,paper_id):
time.sleep(seconds)
print "Starting download..."
count = 1
while True:
print "trying to login %d" % count
if authCheck("cookies.txt") == True:
print "auth success!"
break
count += 1
time.sleep(5)
if downWithCookies("cookies.txt",paper_id):
print "download success!"
return True
return False
def threadDown(seconds,paper_id):
thread.start_new_thread(timingDown,(seconds,paper_id))
def test():
paper_id = raw_input("Input the id of the paper:")
timingDown(0, paper_id)
if __name__ == '__main__':test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment