tcash21/scrape_reddit.py

## scrape_reddit.py
import praw
import re
import pandas as pd
import tinys3

## S3 Connection Credentials
conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True)

## Login to Reddit
r = praw.Reddit(user_agent='Live r/NFL Game Thread Scraper')
r.login('username', 'password')

## Enter the game thread ID from the URL here
submission = r.get_submission(submission_id='41g1g4')

## flatten comments since we don't care about order
flat_comments = praw.helpers.flatten_tree(submission.comments)

dates = []
flairs = []
texts = []
ids = []
for comment in flat_comments:
    if re.search('f+u+c+k+|s+h+i+t+', str(comment).lower()):
        flairs.append(comment.author_flair_text)
        texts.append(comment.body)
        ids.append(comment.id)
        dates.append(comment.created_utc)

## join the dates, comment texts and team flairs together into a pandas dataframe and append to a CSV file
the_comments = pd.DataFrame(dict(date=dates, flairs=flairs, ids=ids, text=texts))
with open('/home/ec2-user/redditScrape/comments.csv', 'a') as f:
    the_comments.to_csv(f, header=False, encoding='utf-8')

## upload the file to S3
f = open('/home/ec2-user/redditScrape/comments.csv','rb')
conn.upload('comments.csv',f,'stattleship')
	import praw
	import re
	import pandas as pd
	import tinys3

	## S3 Connection Credentials
	conn = tinys3.Connection('S3_ACCESS_KEY','S3_SECRET_KEY',tls=True)

	## Login to Reddit
	r = praw.Reddit(user_agent='Live r/NFL Game Thread Scraper')
	r.login('username', 'password')

	## Enter the game thread ID from the URL here
	submission = r.get_submission(submission_id='41g1g4')

	## flatten comments since we don't care about order
	flat_comments = praw.helpers.flatten_tree(submission.comments)

	dates = []
	flairs = []
	texts = []
	ids = []
	for comment in flat_comments:
	if re.search('f+u+c+k+\|s+h+i+t+', str(comment).lower()):
	flairs.append(comment.author_flair_text)
	texts.append(comment.body)
	ids.append(comment.id)
	dates.append(comment.created_utc)

	## join the dates, comment texts and team flairs together into a pandas dataframe and append to a CSV file
	the_comments = pd.DataFrame(dict(date=dates, flairs=flairs, ids=ids, text=texts))
	with open('/home/ec2-user/redditScrape/comments.csv', 'a') as f:
	the_comments.to_csv(f, header=False, encoding='utf-8')

	## upload the file to S3
	f = open('/home/ec2-user/redditScrape/comments.csv','rb')
	conn.upload('comments.csv',f,'stattleship')