Skip to content

Instantly share code, notes, and snippets.

@gstru
Last active September 10, 2021 09:50
Show Gist options
  • Save gstru/02ec642a3b057d9421fe67e1245198f2 to your computer and use it in GitHub Desktop.
Save gstru/02ec642a3b057d9421fe67e1245198f2 to your computer and use it in GitHub Desktop.
Script to read news without scrolling
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import requests
import time
from urllib.parse import unquote, urlparse
import datetime
import openpyxl
filename = 'path/data.xlsm'
wb = openpyxl.load_workbook(filename)
ws = wb['Sites']
news = []
column = ws['A']
for x in range(1, len(column)):
news.append(column[x].value)
print('Ho caricato le riviste')
options = webdriver.ChromeOptions()
options.add_extension("path/ublock.crx")
options.add_extension("path/I-don't-care-about-cookies.crx")
options.add_argument('--disable-gpu')
options.add_argument("--log-level=0")
options.add_argument("--log-level=1")
options.add_argument("--log-level=2")
options.add_argument("--log-level=3")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--start-maximized")
driver = webdriver.Chrome(executable_path=r'path/chromedriver.exe', options=options)
SCROLL_PAUSE_TIME = 0.015
def scroll():
driver.execute_script("window.scrollBy(0, 2.5)")
i = 0
news_num = len(news)-1
while True:
driver.get(news[i])
driver.execute_script("document.body.style.zoom='100%'")
t_end = time.time() + 15
while time.time() < t_end:
scroll()
time.sleep(SCROLL_PAUSE_TIME)
if i == news_num:
driver.close()
driver.quit()
break
i = i + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment