Created
May 11, 2013 16:30
-
-
Save quentinms/5560494 to your computer and use it in GitHub Desktop.
A python script that gets the list of the most popular subreddits and their number of subscribers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import time | |
from time import sleep | |
from urllib2 import Request | |
from urllib2 import build_opener | |
from urllib2 import HTTPError | |
import Cookie | |
def get_subreddits(): | |
subreddits_url = 'http://www.reddit.com/reddits' | |
#We need this cookie in order to be sure to get the NSFW subreddits. | |
c = Cookie.SimpleCookie() | |
c['over18'] = '1' | |
#We need to change the user-agent otherwise we will be blocked by reddit's servers. | |
req_headers = {'User-Agent': 'reddit_stats by /u/quentinms', 'Cookie': c} | |
after = "" | |
#This is the maximum number of subreddits we can get at each call (default value is 25) | |
limit = 100 | |
#While we have not reached the end of the list of subreddits | |
while after != None : | |
url = '%s.json?limit=%s&after=%s' % (subreddits_url, limit, after) | |
req = Request(url, headers=req_headers) | |
opener = build_opener() | |
try: | |
infos = json.load(opener.open(req)) | |
after = infos['data']['after'] | |
extract_subreddits_from_json(infos) | |
#We need to sleep 2 seconds between each API call as requested by reddit's rule. | |
sleep(2) | |
except urllib2.HTTPError: | |
pass | |
#Get the name and number of subscribers for each subreddit in the list | |
def extract_subreddits_from_json(infos): | |
for subreddit in infos['data']['children']: | |
sub_name = subreddit['data']['url'] | |
sub_subscribers = subreddit['data']['subscribers'] | |
print sub_name+", "+str(sub_subscribers) | |
get_subreddits() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment