Last active
April 15, 2021 08:41
-
-
Save ikuokuo/1160862c154d550900fb80110828c94c to your computer and use it in GitHub Desktop.
Using Selenium to login a website, Requests to get its html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# pylint: disable=missing-docstring,import-outside-toplevel,invalid-name | |
import sys | |
import selenium | |
print('Selenium version is {}'.format(selenium.__version__)) | |
print('-' * 80) | |
def main(): | |
#----------------------------------------------------------------------------- | |
# load config | |
import json | |
from types import SimpleNamespace as Namespace | |
secret_file = 'secrets/douban.json' | |
# { | |
# "url": { | |
# "login": "https://www.douban.com/", | |
# "target": "https://www.douban.com/mine/" | |
# }, | |
# "account": { | |
# "username": "username", | |
# "password": "password" | |
# } | |
# } | |
with open(secret_file, 'r', encoding='utf-8') as f: | |
config = json.load(f, object_hook=lambda d: Namespace(**d)) | |
login_url = config.url.login | |
target_url = config.url.target | |
username = config.account.username | |
password = config.account.password | |
#----------------------------------------------------------------------------- | |
# automated testing | |
from selenium import webdriver | |
# Ie Start | |
# Selenium Click is not working with IE11 in Windows 10 | |
# https://github.com/SeleniumHQ/selenium/issues/4292 | |
# opt = webdriver.IeOptions() | |
# opt.ensure_clean_session = True | |
# opt.ignore_protected_mode_settings = True | |
# opt.ignore_zoom_level = True | |
# opt.initial_browser_url = login_url | |
# opt.native_events = False | |
# opt.persistent_hover = True | |
# opt.require_window_focus = True | |
# driver = webdriver.Ie(options = opt) | |
# Ie End | |
# Chrome Start | |
opt = webdriver.ChromeOptions() | |
driver = webdriver.Chrome(options=opt) | |
# Chrome opens with “Data;” with selenium | |
# https://stackoverflow.com/questions/37159684/chrome-opens-with-data-with-selenium | |
# Chrome End | |
# driver.implicitly_wait(5) | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
wait = WebDriverWait(driver, 5) | |
print('open login page ...') | |
driver.get(login_url) | |
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0]) | |
driver.find_element_by_css_selector('li.account-tab-account').click() | |
driver.find_element_by_name('username').send_keys(username) | |
driver.find_element_by_name('password').send_keys(password) | |
driver.find_element_by_css_selector('.account-form .btn').click() | |
try: | |
wait.until(EC.presence_of_element_located((By.ID, "content"))) | |
except TimeoutException: | |
driver.quit() | |
sys.exit('open login page timeout') | |
print('open target page ...') | |
driver.get(target_url) | |
try: | |
wait.until(EC.presence_of_element_located((By.ID, "board"))) | |
except TimeoutException: | |
driver.quit() | |
sys.exit('open target page timeout') | |
# save screenshot | |
driver.save_screenshot('target.png') | |
print('saved to target.png') | |
#----------------------------------------------------------------------------- | |
# save html | |
import requests | |
requests_session = requests.Session() | |
selenium_user_agent = driver.execute_script("return navigator.userAgent;") | |
requests_session.headers.update({"user-agent": selenium_user_agent}) | |
for cookie in driver.get_cookies(): | |
requests_session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain']) | |
# driver.delete_all_cookies() | |
driver.quit() | |
resp = requests_session.get(target_url) | |
resp.encoding = resp.apparent_encoding | |
# resp.encoding = 'utf-8' | |
print('status_code = {0}'.format(resp.status_code)) | |
with open('target.html', 'w+') as fout: | |
fout.write(resp.text) | |
print('saved to target.html') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
target.png
andtarget.html