Last active
April 18, 2018 06:15
-
-
Save Yxnt/510e8f3c12377b8869643c42d263a69a to your computer and use it in GitHub Desktop.
简易爬虫基类
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import redis | |
import json | |
from bs4 import BeautifulSoup | |
from requests import request | |
from config.config import Config | |
pool = redis.ConnectionPool(host=Config.redis_host, port=Config.redis_port, db=Config.redis_db, | |
password=Config.redis_pass) | |
class Spider(object): | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' | |
} | |
r = redis.Redis(connection_pool=pool) | |
bs4 = BeautifulSoup | |
cookie = None | |
def __init__(self, login_url: str, domain: str, login: bool, loginparams: dict, loginmethod: str): | |
self._login_url = login_url # 登录接口 | |
self._islogin = login # 是否要登录 | |
self._domain = domain # 域名 | |
self._loginmethod = loginmethod # 登录接口请求方式 | |
self._loginparams = loginparams # 登录参数 | |
self.headers['Referer'] = self._domain # 配置Referer | |
if self._islogin is True: | |
status = self.login() | |
if status is not True: | |
raise Exception('Login Failed') | |
def login(self): | |
cookie = self.__checkcookie() | |
if cookie is None: | |
if self._loginmethod == 'GET': | |
resp = request(self._loginmethod, self._login_url, params=self._loginparams, headers=self.headers) | |
else: | |
resp = request(self._loginmethod, self._login_url, json=self._loginparams, headers=self.headers) | |
print(resp.json()) | |
if resp.status_code == 200: | |
cookie = requests.utils.dict_from_cookiejar(resp.cookies) # cookiejar 转 dict | |
self.__savecookie(cookie) | |
else: | |
return False | |
self.cookie = cookie | |
return True | |
def __checkcookie(self): | |
result = self.r.get(self._domain) | |
if not result: | |
return None | |
result = json.loads(result.decode('utf8')) | |
return result | |
def __savecookie(self, cookie): | |
self.r.set(self._domain, json.dumps(cookie), ex=10800, nx=True) |
Author
Yxnt
commented
Apr 18, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment