Skip to content

Instantly share code, notes, and snippets.

@BugMonkey
Created March 25, 2018 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BugMonkey/1fea4465a53a94e5f87d7146db05dd8c to your computer and use it in GitHub Desktop.
Save BugMonkey/1fea4465a53a94e5f87d7146db05dd8c to your computer and use it in GitHub Desktop.
保持登陆爬虫练手
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.3 (C:\ProgramData\Anaconda3\python.exe)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/untitled1.iml" filepath="$PROJECT_DIR$/.idea/untitled1.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
�PNG

IHDR3�T�sRGB���gAMA�� �a pHYs���o�dCIDATXGc�?����L�k��lWs��� �2{0��?��ʀ���%Pqa0�&�]���@�z��b0����L�?�� @�@ <c����j���!��< ��8R�{�Aň�1wx �H�N�`����A<x� ,�`<�A65�ݿ� ,��q����c�=�l �f�G�z��g���1 5F`!�aك9�=0G�z*zQϠ� Ԡz�:��� <��a$��U���=HR�� N�1�p`�9����f
F�)��MbՓ�d���`�3��z���@�E]����B�� Q��0��ZIEND�B`�
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" >
<head><title>
无标题页
</title></head>
<body>
<form name="form1" method="post" action="ValidateCode.aspx" id="form1">
<div>
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNzgzNDMwNTMzZGRa6pE+5m7RnweLO9m8ae4PIZUwoqeyFyoIc2qKxE0WfQ==" />
</div>
<div>
</div>
</form>
</body>
</html>
from urllib.request import urlretrieve
import requests
from bs4 import BeautifulSoup
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = 'C:\Program Files (x86)\Tesseract-OCR/tesseract.exe'
class madeInChinaParser(object):
def __init__(self):
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
self.cookie = None
self.rq = requests.Session()
# 获取首页所需条件 验证码 cookies等 准备登陆
def main_page(self):
# http://win.madeinchina.cn/saas/land.html
self.login(self.getRequest('http://win.madeinchina.cn/saas/land.html'))
def getRequest(self, url):
response = self.rq.get(url, headers=self.headers)
self.cookie = response.cookies
print(self.cookie)
# 获取验证码
img = self.rq.get('http://win.madeinchina.cn/ValidateCode.aspx', cookies=self.cookie, headers=self.headers)
if img.status_code == 200:
with open('logo.jpg', 'wb') as f:
for chunk in img:
f.write(chunk)
image = Image.open('logo.jpg')
code = pytesseract.image_to_string(image)
print(code)
return code
def login(self, inputcode):
# type=1&inputCode=dvww&Email=791281557%40qq.com&pwd=JOSCOLILY%40914501
login_map = {'type': '1', 'inputCode': inputcode, 'Email': '791281557%40qq.com', 'pwd': 'JOSCOLILY%40914501'}
login_response = self.rq.post('http://win.madeinchina.cn/saas/LoginControl.aspx', headers=self.headers,
cookies=self.cookie, params=login_map)
print(self.cookie, login_response.headers)
self.rq .cookies=self.cookie
self.rq .headers= login_response.headers
self.querry(self.cookie, login_response.headers)
'''
'__EVENTARGUMENT': '', '__LASTFOCUS': '',
'__VIEWSTATE': '/wEPDwUKMTI2ODIxOTc0MQ9kFgICAw9kFgYCAQ8WAh4EVGV4dAUWPGI+5pCc57Si5p2h5Lu277yaPC9iPmQCAw8QZBAVIwnpgInmi6nnnIEG5a6'
'J5b69BuWMl+S6rAbnpo/lu7oG55SY6IKDBuW5v+S4nAblub/opb8G6LS15beeBua1t+WNlwbmsrPljJcG5rKz5Y2XCem7kem+meaxnwb'
'muZbljJcG5rmW5Y2XBuWQieaelwbmsZ/oi48G5rGf6KW/Bui+veWugQnlhoXokpnlj6QG5a6B5aSPBumdkua1twblsbHk'
'uJwG5bGx6KW/BumZleilvwbkuIrmtbcG5Zub5bedBuWkqea0pQbopb/ol48G5paw55aGBuS6keWNlwbmtZnmsZ8G'
'6YeN5bqGBummmea4rwbmvrPpl6gG5Y+w5rm+FSMBMAQxMDAyBDEwOTgEMTEwMwQxMTgxBDI2MTQEMTI3NwQxM'
'zgyBDE0NzQEMTUxMQQxNjcwBDE4MTYEMTkwOAQyMDAyBDIxMTgEMjE3NwQyMjU4BDIzNjEEMjQzNAQyNTM2BDI1Nj'
'EEMjg0NwQyNzI4BDI5NzMEMjYxMAQzMDc4BDMyNTYEMzI5MAQzMzcxBDM1NTkEMzQ3OAQzMjYyBDk5OTcEOTk5OAQ'
'5OTk5FCsDI2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnFgECD2QCBA8QZBAVDgnpgInmi6n'
'luIIG5bi45beeBua3ruWuiQnov57kupHmuK8G5Y2X5LqsBuWNl+mAmgboi4/lt54G5a6/6L+BBuazsOW3ngbml6D'
'plKEG5b6Q5beeBuebkOWfjgbmiazlt54G6ZWH5rGfFQ4BMAQyMjQzBDIxOTYEMjE5MAQyMTc4BDIyMzAEMjI1MQ'
'QyMjAyBDIyMjQEMjI0NwQyMTgyBDIyMDgEMjIxOAQyMjM4FCsDDmdnZ2dnZ2dnZ2dnZ2dnZGRkBOfgZpiy4VU3Ya'
'OCWZILyFoZ1+Yk1rLXeMzUL6A01bk=',
'''
def querry(self, cookies, headers):
# Referer: http://win.madeinchina.cn/YellowPages01_revise.aspx
headers['Referer'] = 'http://win.madeinchina.cn/menu.html'
self.rq.post('http://win.madeinchina.cn/YellowPages01_revise.aspx')
headers['Referer'] = 'http://win.madeinchina.cn/YellowPages01_revise.aspx'
query_map = {'corpkey': '', 'drpProvince': '2177',
'drpCity': '2224', 'begintme': '2013-03-25'
, 'endtme': '2018-03-25', 'Button1': '搜索客户', 'textfield2': ''}
print(headers)
login_response = self.rq.post('http://win.madeinchina.cn/YellowPages01_revise.aspx', params=query_map)
print(login_response.status_code)
print(BeautifulSoup(login_response.content, 'html.parser', from_encoding='utf-8'))
if __name__ == "__main__":
parser = madeInChinaParser()
parser.main_page()
1. 网址
http://win.madeinchina.cn/saas/land.html //登陆页
2. 账号密码
791281557@qq.com
JOSCOLILY@914501
3.查询请求
__EVENTARGUMENT
__LASTFOCUS
__VIEWSTATE=/wEPDwUKMTI2ODIxOTc0MQ9kFgICAw9kFgYCAQ8WAh4EVGV4dAUWPGI+5pCc57Si5p2h5Lu277yaPC9iPmQCAw8QZBAVIwnpgInmi6nnnIEG5a6J5b69BuWMl+S6rAbnpo/lu7oG55SY6IKDBuW5v+S4nAblub/opb8G6LS15beeBua1t+WNlwbmsrPljJcG5rKz5Y2XCem7kem+meaxnwbmuZbljJcG5rmW5Y2XBuWQieaelwbmsZ/oi48G5rGf6KW/Bui+veWugQnlhoXokpnlj6QG5a6B5aSPBumdkua1twblsbHkuJwG5bGx6KW/BumZleilvwbkuIrmtbcG5Zub5bedBuWkqea0pQbopb/ol48G5paw55aGBuS6keWNlwbmtZnmsZ8G6YeN5bqGBummmea4rwbmvrPpl6gG5Y+w5rm+FSMBMAQxMDAyBDEwOTgEMTEwMwQxMTgxBDI2MTQEMTI3NwQxMzgyBDE0NzQEMTUxMQQxNjcwBDE4MTYEMTkwOAQyMDAyBDIxMTgEMjE3NwQyMjU4BDIzNjEEMjQzNAQyNTM2BDI1NjEEMjg0NwQyNzI4BDI5NzMEMjYxMAQzMDc4BDMyNTYEMzI5MAQzMzcxBDM1NTkEMzQ3OAQzMjYyBDk5OTcEOTk5OAQ5OTk5FCsDI2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnFgECD2QCBA8QZBAVDgnpgInmi6nluIIG5bi45beeBua3ruWuiQnov57kupHmuK8G5Y2X5LqsBuWNl+mAmgboi4/lt54G5a6/6L+BBuazsOW3ngbml6DplKEG5b6Q5beeBuebkOWfjgbmiazlt54G6ZWH5rGfFQ4BMAQyMjQzBDIxOTYEMjE5MAQyMTc4BDIyMzAEMjI1MQQyMjAyBDIyMjQEMjI0NwQyMTgyBDIyMDgEMjIxOAQyMjM4FCsDDmdnZ2dnZ2dnZ2dnZ2dnZGRkBOfgZpiy4VU3YaOCWZILyFoZ1+Yk1rLXeMzUL6A01bk=
corpkey
drpProvince=2177
drpCity=2224
begintme=2013-03-25
endtme=2018-03-25
Button1=搜索客户
textfield2
4 登陆请求
POST http://win.madeinchina.cn/saas/LoginControl.aspx HTTP/1.1
Host: win.madeinchina.cn
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0
Accept: */*
Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2
Accept-Encoding: gzip, deflate
Referer: http://win.madeinchina.cn/saas/land.html
Content-Type: application/x-www-form-urlencoded
X-Requested-With: XMLHttpRequest
Content-Length: 69
Cookie: ASP.NET_SessionId=f5eixq0tgvq0rayr3sk1rykq
Connection: keep-alive
type=1&inputCode=dvww&Email=791281557%40qq.com&pwd=JOSCOLILY%40914501
4.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment