-
-
Save ilovefreesw/36587762f3239162a4c1acef5e759822 to your computer and use it in GitHub Desktop.
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from selenium.webdriver.common.by import By | |
from tqdm import tqdm | |
import time | |
lines = [] | |
Links_File = r'' | |
OP_DIR = r'' | |
i = 1 | |
S = lambda X: driver.execute_script('return document.body.scrollHeight') + X | |
with open(Links_File, "r") as f: | |
lines = f.readlines() | |
lines = [line.rstrip() for line in lines] | |
options = webdriver.ChromeOptions() | |
options.headless = True | |
options.add_argument('--log-level=3') | |
driver = webdriver.Chrome(options=options) | |
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.4103.97 Safari/537.36'}) | |
print(driver.execute_script("return navigator.userAgent;")) | |
for link in tqdm(lines, ncols=65): | |
try: | |
driver.get(link) | |
time.sleep(5) | |
driver.set_window_size(1024,S(0)) # May need manual adjustment | |
driver.find_element(By.TAG_NAME,"body").screenshot(f'{OP_DIR}\{i}.png') | |
i = i + 1 | |
except WebDriverException: | |
print(link) | |
continue | |
driver.quit() |
Doesn't work for me. is it due to Selenium removing that find element by tag?
I used the following:
driver.find_element(By.TAG_NAME, 'body')
Doesn't work for me. is it due to Selenium removing that find element by tag?
If you downgrade your selenium version, it will work.
I got this error:
_Traceback (most recent call last):
File "C:\Users\mateusz\Downloads\imx.to\bulk_webpage_screenshots.py", line 29, in <module>
driver.find_elements(By.TAG_NAME, "body").screenshot(f'{OP_DIR}\{i}.png')
AttributeError: 'list' object has no attribute 'screenshot'_
I got this error:
_Traceback (most recent call last): File "C:\Users\mateusz\Downloads\imx.to\bulk_webpage_screenshots.py", line 29, in <module> driver.find_elements(By.TAG_NAME, "body").screenshot(f'{OP_DIR}\{i}.png') AttributeError: 'list' object has no attribute 'screenshot'_
@hejhopsa Made few changes. See if it works.
You can do it in two ways.
- Inject JavaScript based on website you are taking screenshot of.
- Load Chrome with an extension installed that will block the cookie and other popups. Try with https://adlock.com/ or https://crumbs.org/en/
You will need CRX file of any of these extensions that you can get using this: https://chrome.google.com/webstore/detail/get-crx/dijpllakibenlejkbajahncialkbdkjc
Now, you can load the extension using CRX like this:
options.add_extension('pathToCRX')
Add this after line 18 and update PATH to the CRX file of the extension.
Thank you following yours suggestions worked fine!
You can do it in two ways.
- Inject JavaScript based on website you are taking screenshot of.
- Load Chrome with an extension installed that will block the cookie and other popups. Try with https://adlock.com/ or https://crumbs.org/en/
You will need CRX file of any of these extensions that you can get using this: https://chrome.google.com/webstore/detail/get-crx/dijpllakibenlejkbajahncialkbdkjcNow, you can load the extension using CRX like this:
options.add_argument('pathToCRX')
Add this after line 18 and update PATH to the CRX file of the extension.
Thank you following yours suggestions worked fine!
Doesn't work for me. is it due to Selenium removing that find element by tag?