Created
May 18, 2022 17:09
-
-
Save jrosen48/44b43119195a987be15fe86c01f27f69 to your computer and use it in GitHub Desktop.
getting-comments-from-facebook-scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from facebook_scraper import get_posts | |
import pandas as pd | |
import time | |
import json | |
# param | |
n_pages_to_iterate = 100 # number of pages to scrape within one FB page | |
# reading data with page names | |
district_data = pd.read_csv("2020-2021-critical-race-posts-schools-districts.csv") | |
# using just the link variable, | |
links_of_district_accounts = district_data["URL"].str[25:] | |
links_of_district_accounts = links_of_district_accounts[0:3] | |
page = pd.DataFrame() | |
comments = [] | |
# accessing page information | |
for page_name in links_of_district_accounts: | |
print("working on ", str(page_name)) | |
try: | |
for post in get_posts(str(page_name), | |
pages = n_pages_to_iterate, | |
options={"comments": True}, | |
cookies = "cookies.txt"): # extra info gets reactions | |
post_id = post['post_id'] | |
for comment in post['comments_full']: | |
comments.append( | |
{'post_id': post_id, | |
'comment_text': comment['text']} | |
) | |
page = page.append(post, ignore_index = True) | |
# because this involves web-scraping | |
time.sleep(15) | |
except BaseException as e: | |
print(e) | |
print('some kind of error for ', str(page_name)) | |
comment_df = pd.DataFrame.from_records(comments) | |
comment_df.to_csv("comments-for-test-comments.csv") | |
print(page) | |
page.to_csv("posts-for-test-comments.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment