Skip to content

Instantly share code, notes, and snippets.

@ikuokuo
Last active April 15, 2021 08:41
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikuokuo/1160862c154d550900fb80110828c94c to your computer and use it in GitHub Desktop.
Save ikuokuo/1160862c154d550900fb80110828c94c to your computer and use it in GitHub Desktop.
Using Selenium to login a website, Requests to get its html
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=missing-docstring,import-outside-toplevel,invalid-name
import sys
import selenium
print('Selenium version is {}'.format(selenium.__version__))
print('-' * 80)
def main():
#-----------------------------------------------------------------------------
# load config
import json
from types import SimpleNamespace as Namespace
secret_file = 'secrets/douban.json'
# {
# "url": {
# "login": "https://www.douban.com/",
# "target": "https://www.douban.com/mine/"
# },
# "account": {
# "username": "username",
# "password": "password"
# }
# }
with open(secret_file, 'r', encoding='utf-8') as f:
config = json.load(f, object_hook=lambda d: Namespace(**d))
login_url = config.url.login
target_url = config.url.target
username = config.account.username
password = config.account.password
#-----------------------------------------------------------------------------
# automated testing
from selenium import webdriver
# Ie Start
# Selenium Click is not working with IE11 in Windows 10
# https://github.com/SeleniumHQ/selenium/issues/4292
# opt = webdriver.IeOptions()
# opt.ensure_clean_session = True
# opt.ignore_protected_mode_settings = True
# opt.ignore_zoom_level = True
# opt.initial_browser_url = login_url
# opt.native_events = False
# opt.persistent_hover = True
# opt.require_window_focus = True
# driver = webdriver.Ie(options = opt)
# Ie End
# Chrome Start
opt = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=opt)
# Chrome opens with “Data;” with selenium
# https://stackoverflow.com/questions/37159684/chrome-opens-with-data-with-selenium
# Chrome End
# driver.implicitly_wait(5)
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 5)
print('open login page ...')
driver.get(login_url)
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0])
driver.find_element_by_css_selector('li.account-tab-account').click()
driver.find_element_by_name('username').send_keys(username)
driver.find_element_by_name('password').send_keys(password)
driver.find_element_by_css_selector('.account-form .btn').click()
try:
wait.until(EC.presence_of_element_located((By.ID, "content")))
except TimeoutException:
driver.quit()
sys.exit('open login page timeout')
print('open target page ...')
driver.get(target_url)
try:
wait.until(EC.presence_of_element_located((By.ID, "board")))
except TimeoutException:
driver.quit()
sys.exit('open target page timeout')
# save screenshot
driver.save_screenshot('target.png')
print('saved to target.png')
#-----------------------------------------------------------------------------
# save html
import requests
requests_session = requests.Session()
selenium_user_agent = driver.execute_script("return navigator.userAgent;")
requests_session.headers.update({"user-agent": selenium_user_agent})
for cookie in driver.get_cookies():
requests_session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])
# driver.delete_all_cookies()
driver.quit()
resp = requests_session.get(target_url)
resp.encoding = resp.apparent_encoding
# resp.encoding = 'utf-8'
print('status_code = {0}'.format(resp.status_code))
with open('target.html', 'w+') as fout:
fout.write(resp.text)
print('saved to target.html')
if __name__ == "__main__":
main()
@ikuokuo
Copy link
Author

ikuokuo commented May 29, 2020

  1. login

douban_login

  1. target.png and target.html

douban_result

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment