Skip to content

Instantly share code, notes, and snippets.

@dreamline2
Created February 18, 2020 06:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dreamline2/6df1c4ff4e00e36404669b1437b45820 to your computer and use it in GitHub Desktop.
Save dreamline2/6df1c4ff4e00e36404669b1437b45820 to your computer and use it in GitHub Desktop.
Crawler for facebook group posts.
import selenium
from selenium import webdriver
from bs4 import BeautifulSoup
import numpy as np
import time
import csv
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver=webdriver.Chrome("./chromedriver",chrome_options=chrome_options)
driver.get("https://www.facebook.com/groups/pythontw/")
post=[]
soup=BeautifulSoup(driver.page_source,"lxml")
first=soup.find_all(class_="text_exposed_root")
for i in first:
post.append(i.text)
timme=soup.find_all("abbr")
real=[]
for i in timme:
if i.get("title")!=None:
real.append(i.get("title"))
while real[-1].split(" ")[0]!="2019/9/3":
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
post=[]
soup=BeautifulSoup(driver.page_source,"lxml")
first=soup.find_all(class_="text_exposed_root")
for i in first:
post.append(i.text)
timme=soup.find_all("abbr")
real=[]
for i in timme:
if i.get("title")!=None:
real.append(i.get("title"))
print(real[-1].split(" ")[0])
print(len(post))
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment