Skip to content

Instantly share code, notes, and snippets.

@quentinms
Created May 11, 2013 16:30
Show Gist options
  • Save quentinms/5560494 to your computer and use it in GitHub Desktop.
Save quentinms/5560494 to your computer and use it in GitHub Desktop.
A python script that gets the list of the most popular subreddits and their number of subscribers
import json
import time
from time import sleep
from urllib2 import Request
from urllib2 import build_opener
from urllib2 import HTTPError
import Cookie
def get_subreddits():
subreddits_url = 'http://www.reddit.com/reddits'
#We need this cookie in order to be sure to get the NSFW subreddits.
c = Cookie.SimpleCookie()
c['over18'] = '1'
#We need to change the user-agent otherwise we will be blocked by reddit's servers.
req_headers = {'User-Agent': 'reddit_stats by /u/quentinms', 'Cookie': c}
after = ""
#This is the maximum number of subreddits we can get at each call (default value is 25)
limit = 100
#While we have not reached the end of the list of subreddits
while after != None :
url = '%s.json?limit=%s&after=%s' % (subreddits_url, limit, after)
req = Request(url, headers=req_headers)
opener = build_opener()
try:
infos = json.load(opener.open(req))
after = infos['data']['after']
extract_subreddits_from_json(infos)
#We need to sleep 2 seconds between each API call as requested by reddit's rule.
sleep(2)
except urllib2.HTTPError:
pass
#Get the name and number of subscribers for each subreddit in the list
def extract_subreddits_from_json(infos):
for subreddit in infos['data']['children']:
sub_name = subreddit['data']['url']
sub_subscribers = subreddit['data']['subscribers']
print sub_name+", "+str(sub_subscribers)
get_subreddits()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment