Created
October 10, 2015 13:32
-
-
Save yinzishao/df22d8cde5ffa04cad19 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" /> | |
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<module type="PYTHON_MODULE" version="4"> | |
<component name="NewModuleRootManager"> | |
<content url="file://$MODULE_DIR$" /> | |
<orderEntry type="jdk" jdkName="Python 3.4.3 (E:\soft\Python3.4\python.exe)" jdkType="Python SDK" /> | |
<orderEntry type="sourceFolder" forTests="false" /> | |
</component> | |
</module> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="DaemonCodeAnalyzer"> | |
<disable_hints /> | |
</component> | |
<component name="DependencyValidationManager"> | |
<option name="SKIP_IMPORT_STATEMENTS" value="false" /> | |
</component> | |
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" /> | |
<component name="ProjectLevelVcsManager" settingsEditedManually="false"> | |
<OptionsSetting value="true" id="Add" /> | |
<OptionsSetting value="true" id="Remove" /> | |
<OptionsSetting value="true" id="Checkout" /> | |
<OptionsSetting value="true" id="Update" /> | |
<OptionsSetting value="true" id="Status" /> | |
<OptionsSetting value="true" id="Edit" /> | |
<ConfirmationsSetting value="0" id="Add" /> | |
<ConfirmationsSetting value="0" id="Remove" /> | |
</component> | |
<component name="ProjectModuleManager"> | |
<modules /> | |
</component> | |
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (E:\soft\Python3.4\python.exe)" project-jdk-type="Python SDK" /> | |
<component name="RunManager"> | |
<list size="0" /> | |
</component> | |
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ProjectModuleManager"> | |
<modules> | |
<module fileurl="file://$PROJECT_DIR$/.idea/GDUFS_service.iml" filepath="$PROJECT_DIR$/.idea/GDUFS_service.iml" /> | |
</modules> | |
</component> | |
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<component name="DependencyValidationManager"> | |
<state> | |
<option name="SKIP_IMPORT_STATEMENTS" value="false" /> | |
</state> | |
</component> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="VcsDirectoryMappings"> | |
<mapping directory="" vcs="" /> | |
</component> | |
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project version="4"> | |
<component name="ChangeListManager"> | |
<option name="TRACKING_ENABLED" value="true" /> | |
<option name="SHOW_DIALOG" value="false" /> | |
<option name="HIGHLIGHT_CONFLICTS" value="true" /> | |
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> | |
<option name="LAST_RESOLUTION" value="IGNORE" /> | |
</component> | |
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" /> | |
<component name="CreatePatchCommitExecutor"> | |
<option name="PATCH_PATH" value="" /> | |
</component> | |
<component name="DaemonCodeAnalyzer"> | |
<disable_hints /> | |
</component> | |
<component name="ProjectLevelVcsManager" settingsEditedManually="false"> | |
<OptionsSetting value="true" id="Add" /> | |
<OptionsSetting value="true" id="Remove" /> | |
<OptionsSetting value="true" id="Checkout" /> | |
<OptionsSetting value="true" id="Update" /> | |
<OptionsSetting value="true" id="Status" /> | |
<OptionsSetting value="true" id="Edit" /> | |
<ConfirmationsSetting value="0" id="Add" /> | |
<ConfirmationsSetting value="0" id="Remove" /> | |
</component> | |
<component name="RunManager"> | |
<list size="0" /> | |
</component> | |
<component name="ShelveChangesManager" show_recycled="false" /> | |
<component name="TaskManager"> | |
<task active="true" id="Default" summary="Default task"> | |
<option name="number" value="Default" /> | |
</task> | |
<servers /> | |
</component> | |
<component name="VcsContentAnnotationSettings"> | |
<option name="myLimit" value="2678400000" /> | |
</component> | |
<component name="VcsManagerConfiguration"> | |
<option name="myTodoPanelSettings"> | |
<TodoPanelSettings /> | |
</option> | |
</component> | |
<component name="XDebuggerManager"> | |
<breakpoint-manager /> | |
<watches-manager /> | |
</component> | |
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<projectDescription> | |
<name>GDUFS_service</name> | |
<comment></comment> | |
<projects> | |
</projects> | |
<buildSpec> | |
<buildCommand> | |
<name>org.python.pydev.PyDevBuilder</name> | |
<arguments> | |
</arguments> | |
</buildCommand> | |
</buildSpec> | |
<natures> | |
<nature>org.python.pydev.pythonNature</nature> | |
</natures> | |
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
<?eclipse-pydev version="1.0"?><pydev_project> | |
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> | |
<path>/${PROJECT_DIR_NAME}/src</path> | |
</pydev_pathproperty> | |
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.0</pydev_property> | |
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> | |
</pydev_project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
eclipse.preferences.version=1 | |
encoding//src/service/test.py=utf8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from http.cookiejar import LWPCookieJar | |
from urllib import request, parse | |
CHARSET = 'utf-8' | |
FORM_URL = 'http://auth.gdufs.edu.cn/pkmslogin.form' | |
def login(username, password): | |
cookie = LWPCookieJar() | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
post_data = {'username':username, 'password':password, 'login-form-type':'pwd'} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(FORM_URL, post_data) | |
try: | |
opener.open(req).read().decode('utf-8') | |
except: | |
print("'%s's login was failed" % username) | |
return None | |
else: | |
print("%s's login was successful" % username) | |
return cookie | |
COOKIE = '''Set-Cookie3: PD-ID="ufQROxGk5NVGDy+BS6EAHpsVgymWEnvJuPhMZ4hVF7pP1UN7y7vjbVntDDJwvP6WJt1G8yODUTHMAwp+yrPofhWu3nqttxSTcvSIrDUL2oq7P09V/5LDB8EIrteOCok7Qhaf8gkbLdxodCK9U7hn/2LNEViFu4eGZQAG/TS58z70XgvPQpXIf51TjkfIdSWr01pglZPoN5/RO3Q44DyOtNb1b5rCdGYxK4FG5uP1EkxHlzT81iGu5tnsc+hQoLn/NMfqKldBLjo="; path="/"; domain=".gdufs.edu.cn"; path_spec; domain_dot; discard; version=0 | |
Set-Cookie3: PD-H-SESSION-ID="4_+a+HtbfLW1yUTtpyJ2szvdVcVHQ0Eio17aahObfwiF5FXp7t"; path="/"; domain="auth.gdufs.edu.cn"; path_spec; discard; version=0 | |
''' | |
if __name__ == '__main__': | |
print(login('20131003502', '').as_lwp_str(True, True)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年8月18日 | |
@author: wan | |
''' | |
from bs4 import BeautifulSoup as Soup | |
from spyne import Unicode | |
from spyne.model.complex import ComplexModel | |
from urllib import request | |
from crawler import CHARSET | |
class CETScore(ComplexModel): | |
year = Unicode | |
term = Unicode | |
examination_name = Unicode | |
examinee_number = Unicode | |
date = Unicode | |
score = Unicode | |
listening_score = Unicode | |
reading_score = Unicode | |
writing_score = Unicode | |
comprehensive_score = Unicode | |
def __init__(self, year, term, examination_name, examinee_number, date, score, | |
listening_score, reading_score, writing_score, comprehensive_score): | |
(self.year, self.term, self.examination_name, self.examinee_number, self.date, self.score, self.listening_score, | |
self.reading_score, self.writing_score, self.comprehensive_score)\ | |
= (year, term, examination_name, examinee_number, date, score, listening_score, reading_score, writing_score, | |
comprehensive_score) | |
def crawl(cookie, student_number): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
soup = Soup(opener.open('http://jw.gdufs.edu.cn/xsdjkscx.aspx?student_number=%s' % student_number), from_encoding=CHARSET) | |
table = soup.table | |
del table.attrs | |
table.tr.decompose() | |
CET_score_list = [] | |
for item in table.find_all('tr'): | |
CET_score_list.append(CETScore(item.contents[1].text, item.contents[2].text, item.contents[3].text, | |
item.contents[4].text, item.contents[5].text, item.contents[6].text, item.contents[7].text, | |
item.contents[8].text, item.contents[9].text, item.contents[10].text)) | |
return CET_score_list | |
if __name__ == '__main__': | |
from crawler import COOKIE | |
from util import cookie_from_str | |
cookie = cookie_from_str(COOKIE) | |
print(crawl(cookie, '20131003502')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年8月18日 | |
@author: wan | |
''' | |
from bs4 import BeautifulSoup as Soup | |
import re | |
from spyne import Unicode, Integer | |
from spyne.model.complex import ComplexModel | |
from urllib import request, parse | |
from crawler import CHARSET | |
__DATE_DICT = {'一':0, '二':1, '三':2, '四':3, '五':4, '六':5, '日':6} | |
class Course(ComplexModel): | |
course_name = Unicode | |
teacher_name = Unicode | |
place = Unicode | |
start_time = Integer | |
numb = Integer | |
def __init__(self, course_name, teacher_name, place, start_time, numb): | |
self.course_name, self.teacher_name, self.place, self.start_time, self.numb =\ | |
course_name, teacher_name, place, start_time, numb | |
def crawl(cookie, student_number, year, term): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
url = 'http://jw.gdufs.edu.cn/xskbcx.aspx?student_number=%s' % student_number | |
soup = Soup(opener.open(url), from_encoding=CHARSET) | |
view_state = soup.find('input', {'name':'__VIEWSTATE'})['value'] | |
table = soup.table | |
selected_list = table.find_all('option', {'selected':True}) | |
if len(selected_list) == 2 and year == selected_list[0]['value'] and term == selected_list[1]['value']: | |
table = soup.find_all('table')[1] | |
else: | |
post_data = {'__EVENTTARGET':'', 'xnd':year, 'xqd':term, '__VIEWSTATE':view_state} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(url, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
table = soup.find_all('table')[1] | |
course_table = [[], [], [], [], [], [], []] | |
table.tr.decompose() | |
for td in table.find_all('td', {'align':'Center'}): | |
if re.match('\\s+', td.text): | |
continue | |
content = [item for item in re.split('<.+?>', str(td)) if item] | |
course_name = content[0] | |
teacher_name = content[2] | |
place = content[3] | |
matcher = re.search('周(.*)第(.*)节', content[1]) | |
t = matcher.group(2).split(',') | |
start_time = t[0] | |
numb = len(t) | |
course = Course(course_name, teacher_name, place, start_time, numb) | |
course_table[__DATE_DICT[matcher.group(1)]].append(course) | |
return course_table | |
if __name__ == '__main__': | |
from crawler import COOKIE | |
from util import cookie_from_str | |
cookie = cookie_from_str(COOKIE) | |
print(crawl(cookie, '20131003502', '2015-2016', '1')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年9月2日 | |
@author: wan | |
''' | |
import re | |
from spyne import Unicode | |
from spyne.model.complex import ComplexModel | |
from bs4 import BeautifulSoup as Soup | |
from urllib import request | |
from crawler import CHARSET | |
INFORMATION_URL = 'http://auth.gdufs.edu.cn' | |
class Information(ComplexModel): | |
def __init__(self, name, photo_url, identity, academy): | |
self.name = name | |
self.photo_url = photo_url | |
self.identity = identity | |
self.academy = academy | |
name = Unicode | |
photo_url = Unicode | |
identity = Unicode | |
academy = Unicode | |
def crawl(cookie): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
soup = Soup(opener.open(INFORMATION_URL), from_encoding=CHARSET) | |
td = soup.find('td', class_='portletBody') | |
text = td.text | |
try: | |
name = re.search('([\u4e00-\u9fa5]+),', text).group(1) | |
except: | |
name = '' | |
photo_url = 'http://auth.gdufs.edu.cn%s' % td.img['src'] | |
identity = re.search('身份:[\\s\\S]*?([\u4e00-\u9fa5]+)', text).group(1) | |
try: | |
academy = re.search('院系:[\\s\\S]*?([\u4e00-\u9fa5]+)', text).group(1) | |
except: | |
academy = '' | |
return Information(name, photo_url, identity, academy) | |
if __name__ == '__main__': | |
from crawler import COOKIE | |
from util import cookie_from_str | |
cookie = cookie_from_str(COOKIE) | |
print(crawl(cookie)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年9月1日 | |
@author: wan | |
''' | |
from spyne.model.complex import ComplexModel | |
from bs4 import BeautifulSoup as Soup | |
from urllib.request import urlopen | |
from crawler import CHARSET | |
import re | |
from spyne import Unicode | |
NEWS = 'gwxw' | |
ANNOUNCEMENT = 'tzgg' | |
class News(ComplexModel): | |
date = Unicode | |
title = Unicode | |
url = Unicode | |
def __init__(self, date, title, url): | |
self.date, self.title, self.url = date, title, url | |
def crawl(aim, page=1): | |
if aim not in [NEWS, ANNOUNCEMENT]: | |
return [] | |
url = 'http://www.gdufs.edu.cn/%s.htm' % aim | |
soup = Soup(urlopen(url), from_encoding=CHARSET) | |
if page != 1: | |
try: | |
td = soup.find('td', id=re.compile('fanye.+')) | |
max_page = eval(re.search('\\d+/(\\d+)', td.text).group(1)) + 1 | |
except: | |
max_page = 1 | |
if max_page <= page: | |
return [] | |
else: | |
url = 'http://www.gdufs.edu.cn/%s/%u.htm' % (aim, max_page - page) | |
soup = Soup(urlopen(url), from_encoding=CHARSET) | |
news_list = [] | |
for li in soup.find('div', {'class':'m_content'}).find_all('li'): | |
date = li.contents[1].text | |
title = li.contents[3].text | |
url = 'http://www.gdufs.edu.cn/%s' % li.contents[3].a['href'].replace('../', '') | |
news_list.append(News(date, title, url)) | |
return news_list | |
if __name__ == '__main__': | |
for item in crawl(NEWS, 1): | |
print(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年9月1日 | |
@author: wan | |
''' | |
from bs4 import BeautifulSoup as Soup | |
from spyne.model.complex import ComplexModel | |
from urllib import request, parse | |
from crawler import CHARSET | |
from spyne import Unicode | |
class Score(ComplexModel): | |
def __init__(self, course_id, course_name, type1, type2, credit, grade_point, normal_performance, midterm_exam_score,final_exam_score, | |
experiment_score, score, minor_mark, make_up_exam_achievement, rebuild_achievement, academy_name, remark, rebuild_mark): | |
self.course_id = course_id | |
self.course_name = course_name | |
self.type1 = type1 | |
self.type2 = type2 | |
self.credit = credit | |
self.grade_point = grade_point | |
self.normal_performance = normal_performance | |
self.midterm_exam_score = midterm_exam_score | |
self.final_exam_score = final_exam_score | |
self.experiment_score = experiment_score | |
self.score = score | |
self.minor_mark = minor_mark | |
self.make_up_exam_achievement = make_up_exam_achievement | |
self.rebuild_achievement = rebuild_achievement | |
self.academy_name = academy_name | |
self.remark = remark | |
self.rebuild_mark = rebuild_mark | |
course_id = Unicode | |
course_name = Unicode | |
type1 = Unicode | |
type2 = Unicode | |
credit = Unicode | |
grade_point = Unicode | |
normal_performance = Unicode | |
midterm_exam_score = Unicode | |
final_exam_score = Unicode | |
experiment_score = Unicode | |
score = Unicode | |
minor_mark = Unicode | |
make_up_exam_achievement = Unicode | |
rebuild_achievement = Unicode | |
academy_name = Unicode | |
remark = Unicode | |
rebuild_mark = Unicode | |
def crawl(cookie, student_number, year, term): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
url = 'http://jw.gdufs.edu.cn/xscj_gc.aspx?student_number=%s' % student_number | |
soup = Soup(opener.open(url), from_encoding=CHARSET) | |
view_state = soup.find('input', {'name':'__VIEWSTATE'})['value'] | |
post_data = {'BUTTON1':'按学期查询', 'ddlXN':year, 'ddlXQ':term, '__VIEWSTATE':view_state} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(url, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
table = soup.table | |
table.tr.decompose() | |
score_list = [] | |
for item in table.find_all('tr'): | |
course_id = item.contents[3].text | |
course_name = item.contents[4].text | |
type1 = item.contents[5].text | |
type2 = item.contents[6].text | |
credit = item.contents[7].text | |
grade_point = item.contents[8].text | |
normal_performance = item.contents[9].text | |
midterm_exam_score = item.contents[10].text | |
final_exam_score = item.contents[11].text | |
experiment_score = item.contents[12].text | |
score = item.contents[13].text | |
minor_mark = item.contents[14].text | |
make_up_exam_achievement = item.contents[15].text | |
rebuild_achievement = item.contents[16].text | |
academy_name = item.contents[17].text | |
remark = item.contents[18].text | |
rebuild_mark = item.contents[19].text | |
score_list.append(Score(course_id, course_name, type1, type2, credit, grade_point, normal_performance, midterm_exam_score,final_exam_score, | |
experiment_score, score, minor_mark, make_up_exam_achievement, rebuild_achievement, academy_name, remark, rebuild_mark)) | |
return score_list | |
if __name__ == '__main__': | |
from crawler import COOKIE | |
from util import cookie_from_str | |
cookie = cookie_from_str(COOKIE) | |
for item in crawl(cookie, '20131003502', '2014-2015', '1'): | |
print(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年9月1日 | |
@author: wan | |
''' | |
from bs4 import BeautifulSoup as Soup | |
from spyne.model.complex import ComplexModel | |
from urllib.request import urlopen | |
from spyne import Unicode | |
from crawler import CHARSET | |
WEATHER_URL = 'http://www.weather.com.cn/weather/101280102.shtml' | |
class Weather(ComplexModel): | |
week = Unicode | |
date = Unicode | |
weather = Unicode | |
highest_temperature = Unicode | |
lowest_temperature = Unicode | |
wind = Unicode | |
def __init__(self, week, date, weather, highest_temperature, lowest_temperature, wind): | |
self.week, self.date, self.weather, self.highest_temperature, self.lowest_temperature, self.wind =\ | |
week, date, weather, highest_temperature, lowest_temperature, wind | |
def crawl(): | |
soup = Soup(urlopen(WEATHER_URL), from_encoding=CHARSET) | |
div = soup.find('div', id='7d') | |
weather_list = [] | |
for li in div.find_all('li'): | |
week = li.find('h1').text | |
date = li.find('h2').text | |
weather = li.find('p', class_='wea').text | |
highest_temperature = li.find('p', class_='tem tem1').text.strip() | |
lowest_temperature = li.find('p', class_='tem tem2').text.strip() | |
wind = li.find('p',class_='win').text.strip() | |
weather_list.append(Weather(week, date, weather, highest_temperature, lowest_temperature, wind)) | |
return weather_list | |
if __name__ == '__main__': | |
for item in crawl(): | |
print(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 2015年9月2日 | |
@author: wan | |
''' | |
from spyne import Integer, Unicode, Float | |
from spyne.model.complex import ComplexModel | |
from bs4 import BeautifulSoup as Soup | |
from urllib import request, parse | |
from crawler import CHARSET | |
import re | |
STATE_DICT = {'正常':0, '冻结':1} | |
YKT_TRY_URL = 'http://ykt.gdufs.edu.cn/gdufsPortalHome.action' | |
YKT_INFORMATION_URL = 'http://ykt.gdufs.edu.cn/accountcardUser.action' | |
YKT_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accounthisTrjn.action' | |
YKT_TODAY_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accounttodatTrjnObject.action' | |
YKT_HISTORY_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accountconsubBrows.action' | |
ACTION_DICT = {'查询全部':'all', '存款':'13', '取款':'14', '消费':'15', '转帐':'16', '补助':'17', '扣款':'18', | |
'电子账户交费':'76', '电子账户退费':'77', '电子账户存款':'90', '电子账户取款':'91', '电子账户转出':'92', | |
'电子账户转入':'93', '电子账户消费':'94', '电子账户银行转帐':'95', '电子账户补助':'96', '电子账户扣款':'97', | |
'电子账户商户退款':'98'} | |
def _sub_system_name_make_up(name): | |
name = name.replace('食..', '食堂') | |
name = name.replace('凉..', '凉茶坊') | |
name = name.replace('交..', '交流中心') | |
name = name.replace('..', '') | |
return name | |
YKT_RECHARGE_URL = 'http://ykt.gdufs.edu.cn/gzwywmYhzzIndex.action' | |
YKT_RECHARGE_URL2 = 'http://ykt.gdufs.edu.cn/gzwywmYhzz.action' | |
YKT_MODIFY_PASSWORD_URL = 'http://ykt.gdufs.edu.cn/accountcpwd.action' | |
YKT_MODIFY_PASSWORD_URL2 = 'http://ykt.gdufs.edu.cn/accountDocpwd.action' | |
YKT_PASSWORD_PHOTO_URL = 'http://ykt.gdufs.edu.cn/getpasswdPhoto.action' | |
YKT_REPORT_LOSS_URL = 'http://ykt.gdufs.edu.cn/accountloss.action' | |
YKT_REPORT_LOSS_URL2 = 'http://ykt.gdufs.edu.cn/accountDoLoss.action' | |
class YKTInformation(ComplexModel): | |
state = Integer | |
balance = Float | |
transition_balance = Float | |
def __init__(self, state, balance, transition_balance): | |
self.state = state | |
self.balance = balance | |
self.transition_balance = transition_balance | |
class Account(ComplexModel): | |
def __init__(self, time, transaction_type, sub_system_name, electronic_account, trading_volume, balance, state): | |
self.time = time | |
self.transaction_type = transaction_type | |
self.sub_system_name = sub_system_name | |
self.electronic_account = electronic_account | |
self.trading_volume = trading_volume | |
self.balance = balance | |
self.state = state | |
time = Unicode | |
transaction_type = Unicode | |
sub_system_name = Unicode | |
electronic_account= Unicode | |
trading_volume = Float | |
balance = Float | |
state = Unicode | |
def _match_from_tr(account_list, trs): | |
for tr in trs: | |
tds = tr.find_all('td') | |
time = tds[0].text | |
transaction_type = tds[1].text | |
sub_system_name = _sub_system_name_make_up(tds[2].text) | |
electronic_account= tds[3].text | |
trading_volume = eval(tds[4].text) | |
balance = eval(tds[5].text) | |
state = tds[7].text | |
account_list.append(Account(time, transaction_type, sub_system_name, electronic_account, trading_volume, balance, state)) | |
def crawl_today_account(cookie, account=None, action=None): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
if not account or not action: | |
soup = Soup(opener.open(YKT_ACCOUNT_URL), from_encoding=CHARSET) | |
selects = soup.find_all('select') | |
if not account: | |
account = selects[0].option['value'] | |
if not action: | |
action = selects[1].option['value'] | |
post_data = {'account':account, 'inputObject':action, 'Submit':'+%C8%B7+%B6%A8+'} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_TODAY_ACCOUNT_URL, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
table = soup.find('table', id='tables') | |
trs = table.find_all('tr') | |
pages = eval(re.search('共(\\d+)页', trs[-1].text).group(1)) | |
if pages == 0: | |
return [] | |
account_list = [] | |
_match_from_tr(account_list, trs[1:-1]) | |
page = 1 | |
while True: | |
page += 1 | |
if page > pages: | |
break | |
post_data = {'account':account, 'inputObject':action, 'pageVo.pageNum':page} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_TODAY_ACCOUNT_URL, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
table = soup.find('table', id='tables') | |
trs = table.find_all('tr') | |
_match_from_tr(account_list, trs[1:-1]) | |
return account_list | |
def crawl_ykt_information(cookie): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
soup = Soup(opener.open(YKT_INFORMATION_URL), from_encoding=CHARSET) | |
table = soup.table.table | |
state = STATE_DICT[table.find('div', text='卡 状 态:').findNext('td').text.strip()] | |
balance_str = table.find('div', text='余 额:').findNext('td').text.strip() | |
balance_list = re.findall('(\\d+\.\\d+)元', balance_str) | |
balance = eval(balance_list[0]) | |
transition_balance = eval(balance_list[1]) + eval(balance_list[2]) | |
return YKTInformation(state, balance, transition_balance) | |
def crawl_history_account(cookie, start_day, end_day, account=None, action=None): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
soup = Soup(opener.open(YKT_ACCOUNT_URL), from_encoding=CHARSET) | |
url = 'http://ykt.gdufs.edu.cn%s' % soup.form['action'] | |
if not account or not action: | |
selects = soup.find_all('select') | |
if not account: | |
account = selects[0].option['value'] | |
if not action: | |
action = selects[1].option['value'] | |
post_data = {'account':account, 'inputObject':action, 'Submit':'+%C8%B7+%B6%A8+'} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(url, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
url = 'http://ykt.gdufs.edu.cn%s' % soup.form['action'] | |
post_data = {'inputStartDate':start_day, 'inputEndDate':end_day} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(url, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
url = 'http://ykt.gdufs.edu.cn/accounthisTrjn.action%s' % soup.form['action'] | |
soup = Soup(opener.open(url), from_encoding=CHARSET) | |
table = soup.find('table', id='tables') | |
trs = table.find_all('tr') | |
pages = eval(re.search('共(\\d+)页', trs[-1].text).group(1)) | |
if pages == 0: | |
return [] | |
account_list = [] | |
_match_from_tr(account_list, trs[1:-1]) | |
page = 1 | |
while True: | |
page += 1 | |
if page > pages: | |
break | |
post_data = {'inputStartDate':start_day, 'inputEndDate':end_day, 'pageNum':page} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_HISTORY_ACCOUNT_URL, post_data) | |
soup = Soup(opener.open(req), from_encoding=CHARSET) | |
table = soup.find('table', id='tables') | |
trs = table.find_all('tr') | |
_match_from_tr(account_list, trs[1:-1]) | |
return account_list | |
def recharge(cookie, money, password): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
soup = Soup(opener.open(YKT_RECHARGE_URL), from_encoding=CHARSET) | |
form = soup.form | |
post_data = {'area':str(money), 'newpasswd':password} | |
post_data['bankAcc'] = form.find('input', {'name':'bankAcc'})['value'] | |
post_data['account'] = form.find('input', {'name':'account'})['value'] | |
post_data['passwd'] = form.find('input', {'name':'passwd'})['value'] | |
post_data['id'] = form.find('input', {'name':'id'})['value'] | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_RECHARGE_URL2, post_data) | |
html = opener.open(req).read().decode('gbk') | |
if '成功' in html: | |
return True | |
else: | |
return False | |
def get_password_image(cookie): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
return opener.open(YKT_PASSWORD_PHOTO_URL).read() | |
def modify_password(cookie, old_password, new_password): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
soup = Soup(opener.open(YKT_MODIFY_PASSWORD_URL), from_encoding=CHARSET) | |
account = soup.find('select').option['value'] | |
post_data = {'account':account, 'passwd':old_password, 'newpasswd':new_password, 'newpasswd2':new_password} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_MODIFY_PASSWORD_URL2, post_data) | |
html = opener.open(req).read().decode('gbk') | |
if '成功' in html: | |
return True | |
else: | |
return False | |
def report_loss(cookie, password): | |
cookie_support = request.HTTPCookieProcessor(cookie) | |
opener = request.build_opener(cookie_support , request.HTTPHandler) | |
opener.open(YKT_TRY_URL) | |
soup = Soup(opener.open(YKT_REPORT_LOSS_URL), from_encoding=CHARSET) | |
account = soup.find('select').option['value'] | |
post_data = {'account':account, 'passwd':password} | |
post_data = parse.urlencode(post_data).encode(CHARSET) | |
req = request.Request(YKT_REPORT_LOSS_URL2, post_data) | |
html = opener.open(req).read().decode('gbk') | |
if '成功' in html: | |
return True | |
else: | |
return False | |
if __name__ == '__main__': | |
from crawler import COOKIE | |
from util import cookie_from_str | |
cookie = cookie_from_str(COOKIE) | |
print(crawl_ykt_information(cookie)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spyne import Application, rpc, ServiceBase, Iterable, Integer, Unicode, Byte, Boolean | |
from spyne.model.complex import ComplexModel | |
from spyne.protocol.soap import Soap11 | |
from spyne.server.wsgi import WsgiApplication | |
from crawler import CET_score_crawler, course_crawler, score_crawler, news_crawler, weather_crawler,\ | |
information_crawler, ykt_crawler | |
import crawler | |
from util import cookie_from_str | |
class GDUFSService(ServiceBase): | |
@rpc(Unicode, Unicode, _returns=Unicode) | |
def login(self, username, password): | |
cookie = crawler.login(username, password) | |
if cookie: | |
return cookie.as_lwp_str() | |
else: | |
return '' | |
@rpc(Unicode, Unicode, _returns=Iterable(CET_score_crawler.CETScore)) | |
def crawl_CET_score(self, cookie, student_number): | |
return CET_score_crawler.crawl(cookie_from_str(cookie), student_number) | |
@rpc(Unicode, Unicode, Unicode, Unicode, _returns=Iterable(Iterable(course_crawler.Course))) | |
def crawl_course(self, cookie, student_number, year, term): | |
return course_crawler.crawl(cookie_from_str(cookie), student_number, year, term) | |
@rpc(Unicode, Unicode, Unicode, Unicode, _returns=Iterable(score_crawler.Score)) | |
def crawl_score(self, cookie, student_number, year, term): | |
return score_crawler.crawl(cookie_from_str(cookie), student_number, year, term) | |
@rpc(Integer, _returns=Iterable(news_crawler.News)) | |
def crawl_news(self, page): | |
return news_crawler.crawl(news_crawler.NEWS, page) | |
@rpc(Integer, _returns=Iterable(news_crawler.News)) | |
def crawl_announcement(self, page): | |
return news_crawler.crawl(news_crawler.ANNOUNCEMENT, page) | |
@rpc(_returns=Iterable(weather_crawler.Weather)) | |
def crawl_weather(self): | |
return weather_crawler.crawl() | |
@rpc(Unicode, _returns=information_crawler.Information) | |
def crawl_information(self, cookie): | |
return information_crawler.crawl(cookie_from_str(cookie)) | |
@rpc(Unicode, _returns=ykt_crawler.YKTInformation) | |
def crawl_ykt_information(self, cookie): | |
return ykt_crawler.crawl_ykt_information(cookie_from_str(cookie)) | |
@rpc(Unicode, _returns=Iterable(ykt_crawler.Account)) | |
def crawl_today_account(self, cookie): | |
return ykt_crawler.crawl_today_account(cookie_from_str(cookie)) | |
@rpc(Unicode, Unicode, Unicode, _returns=Iterable(ykt_crawler.Account)) | |
def crawl_history_account(self, cookie, start_day, end_day): | |
return ykt_crawler.crawl_history_account(cookie_from_str(cookie), start_day, end_day) | |
@rpc(Unicode, Integer, Unicode, _returns=Boolean) | |
def recharge(self, cookie, money, password): | |
return ykt_crawler.recharge(cookie_from_str(cookie), money, password) | |
@rpc(Unicode, _returns=Iterable(Byte)) | |
def get_password_image(self, cookie): | |
return ykt_crawler.get_password_image(cookie_from_str(cookie)) | |
@rpc(Unicode, Unicode, Unicode, _returns=Boolean) | |
def modify_password(self, cookie, old_password, new_password): | |
return ykt_crawler.modify_password(cookie_from_str(cookie), old_password, new_password) | |
@rpc(Unicode, Unicode, _returns=Boolean) | |
def report_loss(self, cookie, password): | |
return ykt_crawler.report_loss(cookie_from_str(cookie), password) | |
if __name__ == '__main__': | |
import logging | |
from wsgiref.simple_server import make_server | |
logging.basicConfig(level=logging.DEBUG) | |
logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG) | |
logging.info("listening to http://127.0.0.1:8000") | |
logging.info("wsdl is at: http://localhost:8000/?wsdl") | |
application = Application([GDUFSService], 'gdufs.service', | |
in_protocol=Soap11(), | |
out_protocol=Soap11()) | |
# server = make_server('192.168.202.225', 8000, WsgiApplication(application)) | |
server = make_server('192.168.1.85', 8000, WsgiApplication(application)) | |
server.serve_forever() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from suds.client import Client | |
from crawler import COOKIE | |
c = Client('http://192.168.202.225:8000/?wsdl') | |
print(c) | |
''' | |
with open('temp.png', 'wb') as file: | |
file.write(bs) | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
HEADER = "Set-Cookie3:" | |
BOOLEAN_ATTRS = ("port_spec", "path_spec", "domain_dot", | |
"secure", "discard") | |
VALUE_ATTRS = ("version", | |
"port", "path", "domain", | |
"expires", | |
"comment", "commenturl") | |
def cookie_from_str(cookie_str): | |
from http.cookiejar import split_header_words, LWPCookieJar, LoadError, Cookie, iso2time | |
import time | |
cookie_str = cookie_str.split('\n') | |
cookie = LWPCookieJar() | |
index = 0 | |
while 1: | |
line = cookie_str[index] | |
index += 1 | |
if line == "": break | |
if not line.startswith(HEADER): | |
continue | |
line = line[len(HEADER):].strip() | |
for data in split_header_words([line]): | |
name, value = data[0] | |
standard = {} | |
rest = {} | |
for k in BOOLEAN_ATTRS: | |
standard[k] = False | |
for k, v in data[1:]: | |
if k is not None: | |
lc = k.lower() | |
else: | |
lc = None | |
if (lc in VALUE_ATTRS) or (lc in BOOLEAN_ATTRS): | |
k = lc | |
if k in BOOLEAN_ATTRS: | |
if v is None: v = True | |
standard[k] = v | |
elif k in VALUE_ATTRS: | |
standard[k] = v | |
else: | |
rest[k] = v | |
h = standard.get | |
expires = h("expires") | |
discard = h("discard") | |
if expires is not None: | |
expires = iso2time(expires) | |
if expires is None: | |
discard = True | |
domain = h("domain") | |
domain_specified = domain.startswith(".") | |
c = Cookie(h("version"), name, value, | |
h("port"), h("port_spec"), | |
domain, domain_specified, h("domain_dot"), | |
h("path"), h("path_spec"), | |
h("secure"), | |
expires, | |
discard, | |
h("comment"), | |
h("commenturl"), | |
rest) | |
cookie.set_cookie(c) | |
return cookie |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment