Skip to content

Instantly share code, notes, and snippets.

@bboe
Created June 16, 2013 19:02
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bboe/5793021 to your computer and use it in GitHub Desktop.
Save bboe/5793021 to your computer and use it in GitHub Desktop.
Saves all the available comments and submissions for a given user.
#!/usr/bin/env python
import cPickle
import praw
import sys
PERIODS = ['all', 'year', 'month', 'week', 'day', 'hour']
PERIOD_VIEWS = ['controversial', 'top']
VIEWS = [{'sort': x, 't': y} for x in PERIOD_VIEWS for y in PERIODS]
VIEWS.insert(0, {'sort': 'new'})
DIRECT_ATTRS = ['author_flair_css_class', 'author_flair_text', 'body',
'body_html', 'created_utc', 'distinguished', 'domain', 'downs',
'edited', 'fullname', 'gilded', 'is_self',
'link_flair_css_class', 'link_flair_text', 'link_id',
'link_title', 'num_comments', 'parent_id', 'score_hidden',
'selftext', 'selftext_html', 'title', 'ups', 'url']
INDIRECT_ATTRS = ['author', 'redditor', 'subreddit']
def get_all(func):
def normalize(item):
data = {x: getattr(item, x) for x in DIRECT_ATTRS if hasattr(item, x)}
for attr in INDIRECT_ATTRS:
if hasattr(item, attr):
data[attr] = str(getattr(item, attr))
if isinstance(item, praw.objects.Submission):
data['permalink'] = item.permalink
return data
items = {}
for view in VIEWS:
added = 0
stop = False
for item in func(limit=None, params=dict(view)):
if item.id not in items:
added += 1
items[item.id] = normalize(item)
if view['sort'] == 'new':
sys.stdout.write('new: ')
stop = added < 900
else:
sys.stdout.write('{0}/{1}: '.format(view['sort'], view['t']))
print('{0} new items'.format(added))
if stop:
break
return items
def main():
if len(sys.argv) != 2:
username = None
while not username:
username = raw_input('Username: ').strip()
else:
username = sys.argv[1]
r = praw.Reddit('bboe redditor info')
#r.config.log_requests = 2
user = r.get_redditor(username)
results = {}
results['comments'] = get_all(user.get_comments)
print('Total Comments: {0}'.format(len(results['comments'])))
results['submissions'] = get_all(user.get_submitted)
print('Total Submissions: {0}'.format(len(results['submissions'])))
cPickle.dump(results, open('{0}.pkl'.format(username), 'w'),
cPickle.HIGHEST_PROTOCOL)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment