Created
January 18, 2016 17:16
-
-
Save tcash21/48bbba625d4e992d8d17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
import re | |
import pandas as pd | |
import tinys3 | |
## S3 Connection Credentials | |
conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True) | |
## Login to Reddit | |
r = praw.Reddit(user_agent='Live r/NFL Game Thread Scraper') | |
r.login('username', 'password') | |
## Enter the game thread ID from the URL here | |
submission = r.get_submission(submission_id='41g1g4') | |
## flatten comments since we don't care about order | |
flat_comments = praw.helpers.flatten_tree(submission.comments) | |
dates = [] | |
flairs = [] | |
texts = [] | |
ids = [] | |
for comment in flat_comments: | |
if re.search('f+u+c+k+|s+h+i+t+', str(comment).lower()): | |
flairs.append(comment.author_flair_text) | |
texts.append(comment.body) | |
ids.append(comment.id) | |
dates.append(comment.created_utc) | |
## join the dates, comment texts and team flairs together into a pandas dataframe and append to a CSV file | |
the_comments = pd.DataFrame(dict(date=dates, flairs=flairs, ids=ids, text=texts)) | |
with open('/home/ec2-user/redditScrape/comments.csv', 'a') as f: | |
the_comments.to_csv(f, header=False, encoding='utf-8') | |
## upload the file to S3 | |
f = open('/home/ec2-user/redditScrape/comments.csv','rb') | |
conn.upload('comments.csv',f,'stattleship') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment