Skip to content

Instantly share code, notes, and snippets.

@KobaKhit
Created October 24, 2018 19:42
Show Gist options
  • Save KobaKhit/86cc56202cc3ec08c3a7c1bd7f011977 to your computer and use it in GitHub Desktop.
Save KobaKhit/86cc56202cc3ec08c3a7c1bd7f011977 to your computer and use it in GitHub Desktop.
A class that enables user to download posts and comments from a subreddit
class Reddit():
def __init__(self,client_id, client_secret,user_agent='My agent'):
self.reddit = praw.Reddit(client_id=client_id,
client_secret=client_secret,
user_agent=user_agent)
def get_comments(self, submission):
# get comments information using the Post as a starting comment
comments = [RedditComment(author=submission.author,
commentid = submission.postid,
postid = submission.postid,
parentid = submission.postid,
body = submission.body,
created = submission.created,
score = submission.score,
num_char = len(submission.body),
num_words = len(submission.body.split(' ')))]
submission.comments.replace_more(limit=None)
for com in submission.comments.list():
body = com.body
comment = RedditComment(author = com.author.name if com.author is not None else 'None',
commentid = com.id,
postid = post.postid,
parentid = com.parent().id, # get parent comment id
body = body,
created = datetime.utcfromtimestamp(com.created_utc),
score = com.score,
num_char = len(body),
num_words = len(body.split(' ')))
comments.append(comment)
return(comments)
def get_submissions(self, subreddit, time_filter='day', comments=False):
posts = []
for submission in self.reddit.subreddit(subreddit).top(time_filter):
# get post information
post = RedditPost(postid = submission.id,
author = submission.author.name if submission.author is not None else 'None',
title = submission.title,
body = submission.selftext,
created = datetime.utcfromtimestamp(submission.created_utc),
score = submission.score,
num_comments = submission.num_comments,
comments = submission.comments)
post.comments = self.get_comments(post) if comments else None
posts.append(post)
return(posts)
@staticmethod
def posts_to_df(posts):
post_keys = [k for k in RedditPost.__dataclass_fields__.keys() if k != 'comments']
return(pd.DataFrame([{k: x.__dict__[k] for k in post_keys} for x in posts]))
@staticmethod
def comments_to_df(posts):
return(pd.DataFrame([y.__dict__ for x in subs for y in x.comments]))
def main():
reddit = Reddit(client_id, client_secret, user_agent)
posts = reddit.get_submissions('lakers', comments = True) # download posts and comments
df_posts = reddit.posts_to_df(subs) # create posts dataframe
df_comments = reddit.comments_to_df(subs) # create comments dataframe
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment