Created
December 22, 2019 15:55
-
-
Save frankli0324/8f03c51aeb9ba34f807e851da8ed98f8 to your computer and use it in GitHub Desktop.
方便编写ehall的爬虫
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from requests import Session | |
import re | |
class EhallSession(Session): | |
def login(self, target, username, password): | |
REGEX_HIDDEN_TAG = '<input type="hidden" name="(.*)" value="(.*)"' | |
REGEX_HTML_COMMENT = r'<!--\s*([\s\S]*?)\s*-->' | |
page = self.get( | |
'http://ids.xidian.edu.cn/authserver/login', | |
params={'service': target} | |
).text | |
page = re.sub(REGEX_HTML_COMMENT, '', page) | |
params = {i[0]: i[1] for i in re.findall(REGEX_HIDDEN_TAG, page)} | |
self.post( | |
'http://ids.xidian.edu.cn/authserver/login', | |
params={'service': target}, | |
data=dict(params, **{ | |
'username': username, | |
'password': password | |
}) | |
) | |
def __init__(self, username, password): | |
super().__init__() | |
self.headers.update({ | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36' | |
}) | |
self.login( | |
'http://ehall.xidian.edu.cn:80//appShow', | |
username, password | |
) | |
def use_app(self, app_id): | |
self.get('http://ehall.xidian.edu.cn//appShow', params={ | |
'appId': app_id | |
}, headers={ | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", | |
}) | |
def get_app_list(self, search_key=''): | |
app_list = self.get('http://ehall.xidian.edu.cn/jsonp/serviceSearchCustom.json', params={ | |
'searchKey': search_key, | |
'pageNumber': 1, | |
'pageSize': 150, | |
'sortKey': 'recentUseCount', | |
'orderKey': 'desc' | |
}).json() | |
assert app_list['hasLogin'] | |
return app_list['data'] | |
def get_app_id(self, search_key): | |
search_result = self.get_app_list(search_key) | |
if len(search_result) == 0: | |
return None | |
if len(search_result) > 1: | |
# warn('multiple results found, returning the first one') | |
pass | |
return search_result[0]['appId'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment