Skip to content

Instantly share code, notes, and snippets.

@DanBrink91
Created January 11, 2014 19:29
Show Gist options
  • Save DanBrink91/8375591 to your computer and use it in GitHub Desktop.
Save DanBrink91/8375591 to your computer and use it in GitHub Desktop.
scrapping reddit
# Grab some subreddits' titles
import requests
from time import sleep
import json
# Get top 25 subreddits
r = requests.get(r'http://www.reddit.com/subreddits/popular/.json')
subs = {}
data = r.json()
titles_by_hundreds = 10
for child in data['data']['children']:
sub = child['data']['display_name']
sleep(0.5) # play nice and chill
# Initial request
sub_request = requests.get(('http://www.reddit.com/r/%s/hot/.json?limit=100' % sub))
sub_data = sub_request.json()
subs[sub] = [child['data']['title'] for child in sub_data['data']['children']]
after = sub_data['data']['after']
# The rest of the requests for this sub
for i in xrange(1, titles_by_hundreds):
sub_request = requests.get(('http://www.reddit.com/r/%s/hot/.json?limit=100&after=%s' % (sub, after)))
sub_data = sub_request.json()
subs[sub].extend([child['data']['title'] for child in sub_data['data']['children']])
after = sub_data['data']['after']
sleep(0.5)
# Write it all to json
f = open('reddits.json', 'w')
f.write(json.dumps(subs, sort_keys=False, indent=4, separators=(',', ':')))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment