Skip to content

Instantly share code, notes, and snippets.

@yumu19
Last active July 21, 2023 12:12
Show Gist options
  • Save yumu19/26f596e4a1b5d7cd71ca812f7227ba8f to your computer and use it in GitHub Desktop.
Save yumu19/26f596e4a1b5d7cd71ca812f7227ba8f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# 2020.08.17 Tsubasa Yumura 作成
# 2023.07.21 Tsubasa Yumura 作成
#
# ■これは何?
# EC2020実行委員(出版担当)の作業のために下記を実行するスクリプトです
# - 論文投稿フォームから著者情報をスクレイピングしTSVで保存
# - 著作権同意のスクリーンショットを取得
#
# ■前準備
# $ pip install selenium
# 下記リンクよりChromeDriverをダウンロードしてスクリプトと同じディレクトリに配置
# https://sites.google.com/a/chromium.org/chromedriver/home
#
# ■使い方
# $ python3 ./ec_pub_scraper.py
#
# 参考
# https://tanuhack.com/selenium/
# https://qiita.com/shota-nekoneko/items/64bbd0c2f534d20e7b77
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import os
import re
import csv
options = Options()
options.add_argument('--disable-gpu');
options.add_argument('--disable-extensions');
options.add_argument('--proxy-server="direct://"');
options.add_argument('--proxy-bypass-list=*');
options.add_argument('--start-maximized');
my_email = 'yumu@example.com'
my_password = 'XXXXXXXXXXXXXXX'
DRIVER_PATH = './chromedriver'
SCREENSHOT_DIR = './screenshots'
OUTPUT_FILE = './output.tsv'
NUMBER = 54 #投稿件数
os.makedirs(SCREENSHOT_DIR, exist_ok=True)
f = open(OUTPUT_FILE, 'w')
writer = csv.writer(f, delimiter='\t')
driver = webdriver.Chrome(executable_path=DRIVER_PATH, chrome_options=options)
url = 'https://ec2020.entcomp.org/sys/users/login'
driver.get(url)
# ログイン
email = driver.find_element_by_id('UserMailaddress')
email.send_keys(my_email)
password = driver.find_element_by_id('UserPassword')
password.send_keys(my_password)
time.sleep(1)
login_selector = 'div:nth-child(6) > div > div > input'
login = driver.find_element_by_css_selector(login_selector)
login.click()
time.sleep(1)
driver.execute_script("document.body.style.zoom='50%'")
w = driver.execute_script('return document.body.scrollWidth')
h = driver.execute_script('return document.body.scrollHeight')
driver.set_window_size(1024, h)
driver.save_screenshot(sspath)
print ("Save screenshot: "+sspath)
f.close()
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment