-
-
Save gavin19/8e2ed7547efcbb376e94f2057f951526 to your computer and use it in GitHub Desktop.
from selenium.webdriver import Firefox, FirefoxOptions | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
import praw | |
from time import sleep | |
r = praw.Reddit(...) | |
opts = FirefoxOptions() | |
opts.add_argument("--headless") | |
opts.set_preference("dom.push.enabled", False) # kill notification popup | |
drv = Firefox(options=opts) | |
timeout = 10 | |
def login(): | |
drv.get("https://www.reddit.com/login") | |
user = drv.find_element(By.ID, "loginUsername") | |
user.send_keys("your_username") | |
pwd = drv.find_element(By.ID, "loginPassword") | |
pwd.send_keys("your_password") | |
btn = drv.find_element(By.CSS_SELECTOR, "button[type='submit']") | |
btn.click() | |
sleep(timeout) | |
cookie = drv.find_element(By.XPATH, '//button[text()="Accept all"]') | |
cookie.click() # kill cookie agreement popup. Probably not needed now | |
sleep(timeout) | |
login() | |
for post in r.subreddit("some_sub").hot(limit=1): | |
cmts = "https://www.reddit.com" + post.permalink | |
drv.get(cmts) | |
for comment in post.comments: | |
id = f"t1_{comment.id}" | |
try: | |
cmt = WebDriverWait(drv, timeout).until( | |
lambda x: x.find_element_by_id(id)) | |
except TimeoutException: | |
print("Page load timed out...") | |
else: | |
cmt.screenshot(id + ".png") |
`Traceback (most recent call last):
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\configparser.py", line 847, in items
d.update(self._sections[section])
KeyError: Ellipsis
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\achil\Bureau\tamerelapute.py", line 8, in
r = praw.Reddit(...)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\util\deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\reddit.py", line 236, in init
self.config = Config(
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\site-packages\praw\config.py", line 84, in init
self.custom = dict(Config.CONFIG.items(site_name), **settings)
File "C:\Users\achil\AppData\Local\Programs\Python\Python310\lib\configparser.py", line 850, in items
raise NoSectionError(section)
configparser.NoSectionError: No section: Ellipsis
You provided the name of a praw.ini configuration which does not exist.
For help with creating a Reddit instance, visit
https://praw.readthedocs.io/en/latest/code_overview/reddit_instance.html
For help on configuring PRAW, visit
https://praw.readthedocs.io/en/latest/getting_started/configuration.html`
Thank you for this! Huge insight: scraping the comments is significantly easier when logging in to reddit in the driver.
this is a life saver