Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ratulotron/ba51674c05cd553d6983 to your computer and use it in GitHub Desktop.
Save ratulotron/ba51674c05cd553d6983 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests as r
import csv
import re
from pprint import pprint
"""
Code written for Python 3. It has better support for UTF-8.
Get a page access token first, then use the following URL to get a long lived access token. Long lived access tokens are valid for 60 days.
https://graph.facebook.com/oauth/access_token?grant_type=fb_exchange_token&client_id={APP_ID}&{APP_SECRET}={PAGE_ACCESS_TOKEN}
"""
# URL template for making requests
GRAPH_URL = 'https://graph.facebook.com/v2.5/{OBJECT_ID}/?key=value&access_token={ACCESS_TOKEN}&fields={FIELDS}'
# Long lived access token
ACCESS_TOKEN = ''
# Any page ID.
PAGE_ID = ''
# emoji list to strip from comments
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
# u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
def get_json(object_id, fields):
"""
Grabs specified fields of object_id, then returns as a dict.
"""
response = r.get(GRAPH_URL.format(OBJECT_ID=object_id,
ACCESS_TOKEN=ACCESS_TOKEN,
FIELDS=fields))
return response.json()
def main():
videos = get_json(PAGE_ID, 'videos{comments}')['videos']
""" structure of videos
{data:{}, paging:{}}
"""
# pprint(videos['paging'])
video_ids = [] # IDs of all the videos of a page
while(True):
try:
for video in videos['data']:
"""
{comments:{}, id:{}},
.... .... ....
{id:{}}
"""
if 'comments' in video:
# only take videos that have comments
video_ids.append(video['id'])
# pprint(video, depth=2)
# Uses pagination to grab next 10 data.
videos = r.get(videos['paging']['next']).json()['videos']
except:
break
print('There are total {} videos.'.format(len(video_ids)))
# print(video_ids)
for video_id in video_ids:
comments = get_json(video_id, 'comments')['comments']
# pprint(comment_messages, depth=2)
# list of actual comments along with dates
comment_list = []
while(True):
try:
for comment_messages in comments['data']:
# What comment_messages might look like
# {comments:{}, id:{}}
# or
# {id:{}}
# pprint(comment_messages['id'], depth=1)
# convert the dict to a tuple and save to comment list
comment_list.append((comment_messages['created_time'],
emoji_pattern.sub(r'', comment_messages['message'])))
comments = r.get(comments['paging']['next']).json()
except:
break
print('Video {} has {} comments.'.format(video_id, len(comment_list)))
with open(video_id+'.csv', 'wt', encoding='utf8') as f:
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
for row in comment_list:
writer.writerow(row)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment