Skip to content

Instantly share code, notes, and snippets.

@Lopoi
Created February 20, 2020 22:56
Show Gist options
  • Save Lopoi/f2f54a9d3a02876f72fe8cbb0a0c83a4 to your computer and use it in GitHub Desktop.
Save Lopoi/f2f54a9d3a02876f72fe8cbb0a0c83a4 to your computer and use it in GitHub Desktop.
this is a bot to hunt Edits from u/srgrafo profile
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import csv
import sys
options = Options()
options.add_argument('--headless')
final = csv.writer(open('edits.csv','w'))
final.writerow(['ID','edit','name_of_edit','pepperoni','comment','subreddit','post','context_link'])
browser = webdriver.Firefox(options=options)
browser.get("https://old.reddit.com/user/SrGrafo/comments/")
soup = BeautifulSoup(browser.page_source,'html.parser')
next = soup.select_one(".next-button").a['href']
pages = 0
i = 1
while next is not None or pages < 1000:
print("Searching Page ",pages)
edits = soup.select("a[href*=imgur]")
for edit in edits:
father = edit.parent.parent.parent.parent.parent
name = edit.parent.text
edit = edit['href']
subreddit = father.parent.select_one('a.subreddit').text
post = father.parent.select_one('a.title').text
context_link = father.find(attrs={"data-event-action":"context"})['href']
browser.get(context_link)
smallsoup = BeautifulSoup(browser.page_source,'html.parser')
realedit = smallsoup.select_one("a[href=\""+edit+"\"]")
if realedit is not None:
realfather = realedit.parent.parent.parent.parent.parent.parent.parent.parent.parent
if realfather.get('class')[0] == "thing":
pepperoni = realfather.select_one("div.entry > p.tagline > a.author").text
comment = realfather.select_one("div.entry > form.usertext > div.usertext-body > div.md > p").text
else:
pepperoni = "Self"
comment = "Post"
else:
pepperoni = 'Not Found'
comment = "Not Fount"
final.writerow([i,edit,name,pepperoni,comment.encode(sys.stdout.encoding, errors='replace'),subreddit,post,context_link])
i += 1
browser.get(next)
soup = BeautifulSoup(browser.page_source,'html.parser')
next = soup.select_one(".next-button").a['href']
print("Page ",pages," Done")
pages += 1
browser.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment