Skip to content

Instantly share code, notes, and snippets.

@yinzishao
Created October 10, 2015 13:32
Show Gist options
  • Save yinzishao/df22d8cde5ffa04cad19 to your computer and use it in GitHub Desktop.
Save yinzishao/df22d8cde5ffa04cad19 to your computer and use it in GitHub Desktop.
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.4.3 (E:\soft\Python3.4\python.exe)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DaemonCodeAnalyzer">
<disable_hints />
</component>
<component name="DependencyValidationManager">
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</component>
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectModuleManager">
<modules />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.3 (E:\soft\Python3.4\python.exe)" project-jdk-type="Python SDK" />
<component name="RunManager">
<list size="0" />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/GDUFS_service.iml" filepath="$PROJECT_DIR$/.idea/GDUFS_service.iml" />
</modules>
</component>
</project>
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="" />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
<component name="CreatePatchCommitExecutor">
<option name="PATCH_PATH" value="" />
</component>
<component name="DaemonCodeAnalyzer">
<disable_hints />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="RunManager">
<list size="0" />
</component>
<component name="ShelveChangesManager" show_recycled="false" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<option name="number" value="Default" />
</task>
<servers />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="VcsManagerConfiguration">
<option name="myTodoPanelSettings">
<TodoPanelSettings />
</option>
</component>
<component name="XDebuggerManager">
<breakpoint-manager />
<watches-manager />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>GDUFS_service</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?><pydev_project>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/${PROJECT_DIR_NAME}/src</path>
</pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.0</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>
eclipse.preferences.version=1
encoding//src/service/test.py=utf8
from http.cookiejar import LWPCookieJar
from urllib import request, parse
CHARSET = 'utf-8'
FORM_URL = 'http://auth.gdufs.edu.cn/pkmslogin.form'
def login(username, password):
cookie = LWPCookieJar()
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
post_data = {'username':username, 'password':password, 'login-form-type':'pwd'}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(FORM_URL, post_data)
try:
opener.open(req).read().decode('utf-8')
except:
print("'%s's login was failed" % username)
return None
else:
print("%s's login was successful" % username)
return cookie
COOKIE = '''Set-Cookie3: PD-ID="ufQROxGk5NVGDy+BS6EAHpsVgymWEnvJuPhMZ4hVF7pP1UN7y7vjbVntDDJwvP6WJt1G8yODUTHMAwp+yrPofhWu3nqttxSTcvSIrDUL2oq7P09V/5LDB8EIrteOCok7Qhaf8gkbLdxodCK9U7hn/2LNEViFu4eGZQAG/TS58z70XgvPQpXIf51TjkfIdSWr01pglZPoN5/RO3Q44DyOtNb1b5rCdGYxK4FG5uP1EkxHlzT81iGu5tnsc+hQoLn/NMfqKldBLjo="; path="/"; domain=".gdufs.edu.cn"; path_spec; domain_dot; discard; version=0
Set-Cookie3: PD-H-SESSION-ID="4_+a+HtbfLW1yUTtpyJ2szvdVcVHQ0Eio17aahObfwiF5FXp7t"; path="/"; domain="auth.gdufs.edu.cn"; path_spec; discard; version=0
'''
if __name__ == '__main__':
print(login('20131003502', '').as_lwp_str(True, True))
'''
Created on 2015年8月18日
@author: wan
'''
from bs4 import BeautifulSoup as Soup
from spyne import Unicode
from spyne.model.complex import ComplexModel
from urllib import request
from crawler import CHARSET
class CETScore(ComplexModel):
year = Unicode
term = Unicode
examination_name = Unicode
examinee_number = Unicode
date = Unicode
score = Unicode
listening_score = Unicode
reading_score = Unicode
writing_score = Unicode
comprehensive_score = Unicode
def __init__(self, year, term, examination_name, examinee_number, date, score,
listening_score, reading_score, writing_score, comprehensive_score):
(self.year, self.term, self.examination_name, self.examinee_number, self.date, self.score, self.listening_score,
self.reading_score, self.writing_score, self.comprehensive_score)\
= (year, term, examination_name, examinee_number, date, score, listening_score, reading_score, writing_score,
comprehensive_score)
def crawl(cookie, student_number):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
soup = Soup(opener.open('http://jw.gdufs.edu.cn/xsdjkscx.aspx?student_number=%s' % student_number), from_encoding=CHARSET)
table = soup.table
del table.attrs
table.tr.decompose()
CET_score_list = []
for item in table.find_all('tr'):
CET_score_list.append(CETScore(item.contents[1].text, item.contents[2].text, item.contents[3].text,
item.contents[4].text, item.contents[5].text, item.contents[6].text, item.contents[7].text,
item.contents[8].text, item.contents[9].text, item.contents[10].text))
return CET_score_list
if __name__ == '__main__':
from crawler import COOKIE
from util import cookie_from_str
cookie = cookie_from_str(COOKIE)
print(crawl(cookie, '20131003502'))
'''
Created on 2015年8月18日
@author: wan
'''
from bs4 import BeautifulSoup as Soup
import re
from spyne import Unicode, Integer
from spyne.model.complex import ComplexModel
from urllib import request, parse
from crawler import CHARSET
__DATE_DICT = {'一':0, '二':1, '三':2, '四':3, '五':4, '六':5, '日':6}
class Course(ComplexModel):
course_name = Unicode
teacher_name = Unicode
place = Unicode
start_time = Integer
numb = Integer
def __init__(self, course_name, teacher_name, place, start_time, numb):
self.course_name, self.teacher_name, self.place, self.start_time, self.numb =\
course_name, teacher_name, place, start_time, numb
def crawl(cookie, student_number, year, term):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
url = 'http://jw.gdufs.edu.cn/xskbcx.aspx?student_number=%s' % student_number
soup = Soup(opener.open(url), from_encoding=CHARSET)
view_state = soup.find('input', {'name':'__VIEWSTATE'})['value']
table = soup.table
selected_list = table.find_all('option', {'selected':True})
if len(selected_list) == 2 and year == selected_list[0]['value'] and term == selected_list[1]['value']:
table = soup.find_all('table')[1]
else:
post_data = {'__EVENTTARGET':'', 'xnd':year, 'xqd':term, '__VIEWSTATE':view_state}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(url, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
table = soup.find_all('table')[1]
course_table = [[], [], [], [], [], [], []]
table.tr.decompose()
for td in table.find_all('td', {'align':'Center'}):
if re.match('\\s+', td.text):
continue
content = [item for item in re.split('<.+?>', str(td)) if item]
course_name = content[0]
teacher_name = content[2]
place = content[3]
matcher = re.search('周(.*)第(.*)节', content[1])
t = matcher.group(2).split(',')
start_time = t[0]
numb = len(t)
course = Course(course_name, teacher_name, place, start_time, numb)
course_table[__DATE_DICT[matcher.group(1)]].append(course)
return course_table
if __name__ == '__main__':
from crawler import COOKIE
from util import cookie_from_str
cookie = cookie_from_str(COOKIE)
print(crawl(cookie, '20131003502', '2015-2016', '1'))
'''
Created on 2015年9月2日
@author: wan
'''
import re
from spyne import Unicode
from spyne.model.complex import ComplexModel
from bs4 import BeautifulSoup as Soup
from urllib import request
from crawler import CHARSET
INFORMATION_URL = 'http://auth.gdufs.edu.cn'
class Information(ComplexModel):
def __init__(self, name, photo_url, identity, academy):
self.name = name
self.photo_url = photo_url
self.identity = identity
self.academy = academy
name = Unicode
photo_url = Unicode
identity = Unicode
academy = Unicode
def crawl(cookie):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
soup = Soup(opener.open(INFORMATION_URL), from_encoding=CHARSET)
td = soup.find('td', class_='portletBody')
text = td.text
try:
name = re.search('([\u4e00-\u9fa5]+),', text).group(1)
except:
name = ''
photo_url = 'http://auth.gdufs.edu.cn%s' % td.img['src']
identity = re.search('身份:[\\s\\S]*?([\u4e00-\u9fa5]+)', text).group(1)
try:
academy = re.search('院系:[\\s\\S]*?([\u4e00-\u9fa5]+)', text).group(1)
except:
academy = ''
return Information(name, photo_url, identity, academy)
if __name__ == '__main__':
from crawler import COOKIE
from util import cookie_from_str
cookie = cookie_from_str(COOKIE)
print(crawl(cookie))
'''
Created on 2015年9月1日
@author: wan
'''
from spyne.model.complex import ComplexModel
from bs4 import BeautifulSoup as Soup
from urllib.request import urlopen
from crawler import CHARSET
import re
from spyne import Unicode
NEWS = 'gwxw'
ANNOUNCEMENT = 'tzgg'
class News(ComplexModel):
date = Unicode
title = Unicode
url = Unicode
def __init__(self, date, title, url):
self.date, self.title, self.url = date, title, url
def crawl(aim, page=1):
if aim not in [NEWS, ANNOUNCEMENT]:
return []
url = 'http://www.gdufs.edu.cn/%s.htm' % aim
soup = Soup(urlopen(url), from_encoding=CHARSET)
if page != 1:
try:
td = soup.find('td', id=re.compile('fanye.+'))
max_page = eval(re.search('\\d+/(\\d+)', td.text).group(1)) + 1
except:
max_page = 1
if max_page <= page:
return []
else:
url = 'http://www.gdufs.edu.cn/%s/%u.htm' % (aim, max_page - page)
soup = Soup(urlopen(url), from_encoding=CHARSET)
news_list = []
for li in soup.find('div', {'class':'m_content'}).find_all('li'):
date = li.contents[1].text
title = li.contents[3].text
url = 'http://www.gdufs.edu.cn/%s' % li.contents[3].a['href'].replace('../', '')
news_list.append(News(date, title, url))
return news_list
if __name__ == '__main__':
for item in crawl(NEWS, 1):
print(item)
'''
Created on 2015年9月1日
@author: wan
'''
from bs4 import BeautifulSoup as Soup
from spyne.model.complex import ComplexModel
from urllib import request, parse
from crawler import CHARSET
from spyne import Unicode
class Score(ComplexModel):
def __init__(self, course_id, course_name, type1, type2, credit, grade_point, normal_performance, midterm_exam_score,final_exam_score,
experiment_score, score, minor_mark, make_up_exam_achievement, rebuild_achievement, academy_name, remark, rebuild_mark):
self.course_id = course_id
self.course_name = course_name
self.type1 = type1
self.type2 = type2
self.credit = credit
self.grade_point = grade_point
self.normal_performance = normal_performance
self.midterm_exam_score = midterm_exam_score
self.final_exam_score = final_exam_score
self.experiment_score = experiment_score
self.score = score
self.minor_mark = minor_mark
self.make_up_exam_achievement = make_up_exam_achievement
self.rebuild_achievement = rebuild_achievement
self.academy_name = academy_name
self.remark = remark
self.rebuild_mark = rebuild_mark
course_id = Unicode
course_name = Unicode
type1 = Unicode
type2 = Unicode
credit = Unicode
grade_point = Unicode
normal_performance = Unicode
midterm_exam_score = Unicode
final_exam_score = Unicode
experiment_score = Unicode
score = Unicode
minor_mark = Unicode
make_up_exam_achievement = Unicode
rebuild_achievement = Unicode
academy_name = Unicode
remark = Unicode
rebuild_mark = Unicode
def crawl(cookie, student_number, year, term):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
url = 'http://jw.gdufs.edu.cn/xscj_gc.aspx?student_number=%s' % student_number
soup = Soup(opener.open(url), from_encoding=CHARSET)
view_state = soup.find('input', {'name':'__VIEWSTATE'})['value']
post_data = {'BUTTON1':'按学期查询', 'ddlXN':year, 'ddlXQ':term, '__VIEWSTATE':view_state}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(url, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
table = soup.table
table.tr.decompose()
score_list = []
for item in table.find_all('tr'):
course_id = item.contents[3].text
course_name = item.contents[4].text
type1 = item.contents[5].text
type2 = item.contents[6].text
credit = item.contents[7].text
grade_point = item.contents[8].text
normal_performance = item.contents[9].text
midterm_exam_score = item.contents[10].text
final_exam_score = item.contents[11].text
experiment_score = item.contents[12].text
score = item.contents[13].text
minor_mark = item.contents[14].text
make_up_exam_achievement = item.contents[15].text
rebuild_achievement = item.contents[16].text
academy_name = item.contents[17].text
remark = item.contents[18].text
rebuild_mark = item.contents[19].text
score_list.append(Score(course_id, course_name, type1, type2, credit, grade_point, normal_performance, midterm_exam_score,final_exam_score,
experiment_score, score, minor_mark, make_up_exam_achievement, rebuild_achievement, academy_name, remark, rebuild_mark))
return score_list
if __name__ == '__main__':
from crawler import COOKIE
from util import cookie_from_str
cookie = cookie_from_str(COOKIE)
for item in crawl(cookie, '20131003502', '2014-2015', '1'):
print(item)
'''
Created on 2015年9月1日
@author: wan
'''
from bs4 import BeautifulSoup as Soup
from spyne.model.complex import ComplexModel
from urllib.request import urlopen
from spyne import Unicode
from crawler import CHARSET
WEATHER_URL = 'http://www.weather.com.cn/weather/101280102.shtml'
class Weather(ComplexModel):
week = Unicode
date = Unicode
weather = Unicode
highest_temperature = Unicode
lowest_temperature = Unicode
wind = Unicode
def __init__(self, week, date, weather, highest_temperature, lowest_temperature, wind):
self.week, self.date, self.weather, self.highest_temperature, self.lowest_temperature, self.wind =\
week, date, weather, highest_temperature, lowest_temperature, wind
def crawl():
soup = Soup(urlopen(WEATHER_URL), from_encoding=CHARSET)
div = soup.find('div', id='7d')
weather_list = []
for li in div.find_all('li'):
week = li.find('h1').text
date = li.find('h2').text
weather = li.find('p', class_='wea').text
highest_temperature = li.find('p', class_='tem tem1').text.strip()
lowest_temperature = li.find('p', class_='tem tem2').text.strip()
wind = li.find('p',class_='win').text.strip()
weather_list.append(Weather(week, date, weather, highest_temperature, lowest_temperature, wind))
return weather_list
if __name__ == '__main__':
for item in crawl():
print(item)
'''
Created on 2015年9月2日
@author: wan
'''
from spyne import Integer, Unicode, Float
from spyne.model.complex import ComplexModel
from bs4 import BeautifulSoup as Soup
from urllib import request, parse
from crawler import CHARSET
import re
STATE_DICT = {'正常':0, '冻结':1}
YKT_TRY_URL = 'http://ykt.gdufs.edu.cn/gdufsPortalHome.action'
YKT_INFORMATION_URL = 'http://ykt.gdufs.edu.cn/accountcardUser.action'
YKT_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accounthisTrjn.action'
YKT_TODAY_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accounttodatTrjnObject.action'
YKT_HISTORY_ACCOUNT_URL = 'http://ykt.gdufs.edu.cn/accountconsubBrows.action'
ACTION_DICT = {'查询全部':'all', '存款':'13', '取款':'14', '消费':'15', '转帐':'16', '补助':'17', '扣款':'18',
'电子账户交费':'76', '电子账户退费':'77', '电子账户存款':'90', '电子账户取款':'91', '电子账户转出':'92',
'电子账户转入':'93', '电子账户消费':'94', '电子账户银行转帐':'95', '电子账户补助':'96', '电子账户扣款':'97',
'电子账户商户退款':'98'}
def _sub_system_name_make_up(name):
name = name.replace('食..', '食堂')
name = name.replace('凉..', '凉茶坊')
name = name.replace('交..', '交流中心')
name = name.replace('..', '')
return name
YKT_RECHARGE_URL = 'http://ykt.gdufs.edu.cn/gzwywmYhzzIndex.action'
YKT_RECHARGE_URL2 = 'http://ykt.gdufs.edu.cn/gzwywmYhzz.action'
YKT_MODIFY_PASSWORD_URL = 'http://ykt.gdufs.edu.cn/accountcpwd.action'
YKT_MODIFY_PASSWORD_URL2 = 'http://ykt.gdufs.edu.cn/accountDocpwd.action'
YKT_PASSWORD_PHOTO_URL = 'http://ykt.gdufs.edu.cn/getpasswdPhoto.action'
YKT_REPORT_LOSS_URL = 'http://ykt.gdufs.edu.cn/accountloss.action'
YKT_REPORT_LOSS_URL2 = 'http://ykt.gdufs.edu.cn/accountDoLoss.action'
class YKTInformation(ComplexModel):
state = Integer
balance = Float
transition_balance = Float
def __init__(self, state, balance, transition_balance):
self.state = state
self.balance = balance
self.transition_balance = transition_balance
class Account(ComplexModel):
def __init__(self, time, transaction_type, sub_system_name, electronic_account, trading_volume, balance, state):
self.time = time
self.transaction_type = transaction_type
self.sub_system_name = sub_system_name
self.electronic_account = electronic_account
self.trading_volume = trading_volume
self.balance = balance
self.state = state
time = Unicode
transaction_type = Unicode
sub_system_name = Unicode
electronic_account= Unicode
trading_volume = Float
balance = Float
state = Unicode
def _match_from_tr(account_list, trs):
for tr in trs:
tds = tr.find_all('td')
time = tds[0].text
transaction_type = tds[1].text
sub_system_name = _sub_system_name_make_up(tds[2].text)
electronic_account= tds[3].text
trading_volume = eval(tds[4].text)
balance = eval(tds[5].text)
state = tds[7].text
account_list.append(Account(time, transaction_type, sub_system_name, electronic_account, trading_volume, balance, state))
def crawl_today_account(cookie, account=None, action=None):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
if not account or not action:
soup = Soup(opener.open(YKT_ACCOUNT_URL), from_encoding=CHARSET)
selects = soup.find_all('select')
if not account:
account = selects[0].option['value']
if not action:
action = selects[1].option['value']
post_data = {'account':account, 'inputObject':action, 'Submit':'+%C8%B7+%B6%A8+'}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_TODAY_ACCOUNT_URL, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
table = soup.find('table', id='tables')
trs = table.find_all('tr')
pages = eval(re.search('共(\\d+)页', trs[-1].text).group(1))
if pages == 0:
return []
account_list = []
_match_from_tr(account_list, trs[1:-1])
page = 1
while True:
page += 1
if page > pages:
break
post_data = {'account':account, 'inputObject':action, 'pageVo.pageNum':page}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_TODAY_ACCOUNT_URL, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
table = soup.find('table', id='tables')
trs = table.find_all('tr')
_match_from_tr(account_list, trs[1:-1])
return account_list
def crawl_ykt_information(cookie):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
soup = Soup(opener.open(YKT_INFORMATION_URL), from_encoding=CHARSET)
table = soup.table.table
state = STATE_DICT[table.find('div', text='卡  状  态:').findNext('td').text.strip()]
balance_str = table.find('div', text='余    额:').findNext('td').text.strip()
balance_list = re.findall('(\\d+\.\\d+)元', balance_str)
balance = eval(balance_list[0])
transition_balance = eval(balance_list[1]) + eval(balance_list[2])
return YKTInformation(state, balance, transition_balance)
def crawl_history_account(cookie, start_day, end_day, account=None, action=None):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
soup = Soup(opener.open(YKT_ACCOUNT_URL), from_encoding=CHARSET)
url = 'http://ykt.gdufs.edu.cn%s' % soup.form['action']
if not account or not action:
selects = soup.find_all('select')
if not account:
account = selects[0].option['value']
if not action:
action = selects[1].option['value']
post_data = {'account':account, 'inputObject':action, 'Submit':'+%C8%B7+%B6%A8+'}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(url, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
url = 'http://ykt.gdufs.edu.cn%s' % soup.form['action']
post_data = {'inputStartDate':start_day, 'inputEndDate':end_day}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(url, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
url = 'http://ykt.gdufs.edu.cn/accounthisTrjn.action%s' % soup.form['action']
soup = Soup(opener.open(url), from_encoding=CHARSET)
table = soup.find('table', id='tables')
trs = table.find_all('tr')
pages = eval(re.search('共(\\d+)页', trs[-1].text).group(1))
if pages == 0:
return []
account_list = []
_match_from_tr(account_list, trs[1:-1])
page = 1
while True:
page += 1
if page > pages:
break
post_data = {'inputStartDate':start_day, 'inputEndDate':end_day, 'pageNum':page}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_HISTORY_ACCOUNT_URL, post_data)
soup = Soup(opener.open(req), from_encoding=CHARSET)
table = soup.find('table', id='tables')
trs = table.find_all('tr')
_match_from_tr(account_list, trs[1:-1])
return account_list
def recharge(cookie, money, password):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
soup = Soup(opener.open(YKT_RECHARGE_URL), from_encoding=CHARSET)
form = soup.form
post_data = {'area':str(money), 'newpasswd':password}
post_data['bankAcc'] = form.find('input', {'name':'bankAcc'})['value']
post_data['account'] = form.find('input', {'name':'account'})['value']
post_data['passwd'] = form.find('input', {'name':'passwd'})['value']
post_data['id'] = form.find('input', {'name':'id'})['value']
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_RECHARGE_URL2, post_data)
html = opener.open(req).read().decode('gbk')
if '成功' in html:
return True
else:
return False
def get_password_image(cookie):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
return opener.open(YKT_PASSWORD_PHOTO_URL).read()
def modify_password(cookie, old_password, new_password):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
soup = Soup(opener.open(YKT_MODIFY_PASSWORD_URL), from_encoding=CHARSET)
account = soup.find('select').option['value']
post_data = {'account':account, 'passwd':old_password, 'newpasswd':new_password, 'newpasswd2':new_password}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_MODIFY_PASSWORD_URL2, post_data)
html = opener.open(req).read().decode('gbk')
if '成功' in html:
return True
else:
return False
def report_loss(cookie, password):
cookie_support = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(cookie_support , request.HTTPHandler)
opener.open(YKT_TRY_URL)
soup = Soup(opener.open(YKT_REPORT_LOSS_URL), from_encoding=CHARSET)
account = soup.find('select').option['value']
post_data = {'account':account, 'passwd':password}
post_data = parse.urlencode(post_data).encode(CHARSET)
req = request.Request(YKT_REPORT_LOSS_URL2, post_data)
html = opener.open(req).read().decode('gbk')
if '成功' in html:
return True
else:
return False
if __name__ == '__main__':
from crawler import COOKIE
from util import cookie_from_str
cookie = cookie_from_str(COOKIE)
print(crawl_ykt_information(cookie))
from spyne import Application, rpc, ServiceBase, Iterable, Integer, Unicode, Byte, Boolean
from spyne.model.complex import ComplexModel
from spyne.protocol.soap import Soap11
from spyne.server.wsgi import WsgiApplication
from crawler import CET_score_crawler, course_crawler, score_crawler, news_crawler, weather_crawler,\
information_crawler, ykt_crawler
import crawler
from util import cookie_from_str
class GDUFSService(ServiceBase):
@rpc(Unicode, Unicode, _returns=Unicode)
def login(self, username, password):
cookie = crawler.login(username, password)
if cookie:
return cookie.as_lwp_str()
else:
return ''
@rpc(Unicode, Unicode, _returns=Iterable(CET_score_crawler.CETScore))
def crawl_CET_score(self, cookie, student_number):
return CET_score_crawler.crawl(cookie_from_str(cookie), student_number)
@rpc(Unicode, Unicode, Unicode, Unicode, _returns=Iterable(Iterable(course_crawler.Course)))
def crawl_course(self, cookie, student_number, year, term):
return course_crawler.crawl(cookie_from_str(cookie), student_number, year, term)
@rpc(Unicode, Unicode, Unicode, Unicode, _returns=Iterable(score_crawler.Score))
def crawl_score(self, cookie, student_number, year, term):
return score_crawler.crawl(cookie_from_str(cookie), student_number, year, term)
@rpc(Integer, _returns=Iterable(news_crawler.News))
def crawl_news(self, page):
return news_crawler.crawl(news_crawler.NEWS, page)
@rpc(Integer, _returns=Iterable(news_crawler.News))
def crawl_announcement(self, page):
return news_crawler.crawl(news_crawler.ANNOUNCEMENT, page)
@rpc(_returns=Iterable(weather_crawler.Weather))
def crawl_weather(self):
return weather_crawler.crawl()
@rpc(Unicode, _returns=information_crawler.Information)
def crawl_information(self, cookie):
return information_crawler.crawl(cookie_from_str(cookie))
@rpc(Unicode, _returns=ykt_crawler.YKTInformation)
def crawl_ykt_information(self, cookie):
return ykt_crawler.crawl_ykt_information(cookie_from_str(cookie))
@rpc(Unicode, _returns=Iterable(ykt_crawler.Account))
def crawl_today_account(self, cookie):
return ykt_crawler.crawl_today_account(cookie_from_str(cookie))
@rpc(Unicode, Unicode, Unicode, _returns=Iterable(ykt_crawler.Account))
def crawl_history_account(self, cookie, start_day, end_day):
return ykt_crawler.crawl_history_account(cookie_from_str(cookie), start_day, end_day)
@rpc(Unicode, Integer, Unicode, _returns=Boolean)
def recharge(self, cookie, money, password):
return ykt_crawler.recharge(cookie_from_str(cookie), money, password)
@rpc(Unicode, _returns=Iterable(Byte))
def get_password_image(self, cookie):
return ykt_crawler.get_password_image(cookie_from_str(cookie))
@rpc(Unicode, Unicode, Unicode, _returns=Boolean)
def modify_password(self, cookie, old_password, new_password):
return ykt_crawler.modify_password(cookie_from_str(cookie), old_password, new_password)
@rpc(Unicode, Unicode, _returns=Boolean)
def report_loss(self, cookie, password):
return ykt_crawler.report_loss(cookie_from_str(cookie), password)
if __name__ == '__main__':
import logging
from wsgiref.simple_server import make_server
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG)
logging.info("listening to http://127.0.0.1:8000")
logging.info("wsdl is at: http://localhost:8000/?wsdl")
application = Application([GDUFSService], 'gdufs.service',
in_protocol=Soap11(),
out_protocol=Soap11())
# server = make_server('192.168.202.225', 8000, WsgiApplication(application))
server = make_server('192.168.1.85', 8000, WsgiApplication(application))
server.serve_forever()
from suds.client import Client
from crawler import COOKIE
c = Client('http://192.168.202.225:8000/?wsdl')
print(c)
'''
with open('temp.png', 'wb') as file:
file.write(bs)
'''
HEADER = "Set-Cookie3:"
BOOLEAN_ATTRS = ("port_spec", "path_spec", "domain_dot",
"secure", "discard")
VALUE_ATTRS = ("version",
"port", "path", "domain",
"expires",
"comment", "commenturl")
def cookie_from_str(cookie_str):
from http.cookiejar import split_header_words, LWPCookieJar, LoadError, Cookie, iso2time
import time
cookie_str = cookie_str.split('\n')
cookie = LWPCookieJar()
index = 0
while 1:
line = cookie_str[index]
index += 1
if line == "": break
if not line.startswith(HEADER):
continue
line = line[len(HEADER):].strip()
for data in split_header_words([line]):
name, value = data[0]
standard = {}
rest = {}
for k in BOOLEAN_ATTRS:
standard[k] = False
for k, v in data[1:]:
if k is not None:
lc = k.lower()
else:
lc = None
if (lc in VALUE_ATTRS) or (lc in BOOLEAN_ATTRS):
k = lc
if k in BOOLEAN_ATTRS:
if v is None: v = True
standard[k] = v
elif k in VALUE_ATTRS:
standard[k] = v
else:
rest[k] = v
h = standard.get
expires = h("expires")
discard = h("discard")
if expires is not None:
expires = iso2time(expires)
if expires is None:
discard = True
domain = h("domain")
domain_specified = domain.startswith(".")
c = Cookie(h("version"), name, value,
h("port"), h("port_spec"),
domain, domain_specified, h("domain_dot"),
h("path"), h("path_spec"),
h("secure"),
expires,
discard,
h("comment"),
h("commenturl"),
rest)
cookie.set_cookie(c)
return cookie
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment