Skip to content

Instantly share code, notes, and snippets.

@tcash21
Created January 18, 2016 17:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tcash21/48bbba625d4e992d8d17 to your computer and use it in GitHub Desktop.
Save tcash21/48bbba625d4e992d8d17 to your computer and use it in GitHub Desktop.
import praw
import re
import pandas as pd
import tinys3
## S3 Connection Credentials
conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True)
## Login to Reddit
r = praw.Reddit(user_agent='Live r/NFL Game Thread Scraper')
r.login('username', 'password')
## Enter the game thread ID from the URL here
submission = r.get_submission(submission_id='41g1g4')
## flatten comments since we don't care about order
flat_comments = praw.helpers.flatten_tree(submission.comments)
dates = []
flairs = []
texts = []
ids = []
for comment in flat_comments:
if re.search('f+u+c+k+|s+h+i+t+', str(comment).lower()):
flairs.append(comment.author_flair_text)
texts.append(comment.body)
ids.append(comment.id)
dates.append(comment.created_utc)
## join the dates, comment texts and team flairs together into a pandas dataframe and append to a CSV file
the_comments = pd.DataFrame(dict(date=dates, flairs=flairs, ids=ids, text=texts))
with open('/home/ec2-user/redditScrape/comments.csv', 'a') as f:
the_comments.to_csv(f, header=False, encoding='utf-8')
## upload the file to S3
f = open('/home/ec2-user/redditScrape/comments.csv','rb')
conn.upload('comments.csv',f,'stattleship')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment