quentinms/subreddit-data-dumper

## subreddit-data-dumper
import json
import time
from time import sleep
from urllib2 import Request
from urllib2 import build_opener
from urllib2 import HTTPError
import Cookie


def get_subreddits():

    subreddits_url = 'http://www.reddit.com/reddits'
    #We need this cookie in order to be sure to get the NSFW subreddits.
    c = Cookie.SimpleCookie()
    c['over18'] = '1'
    #We need to change the user-agent otherwise we will be blocked by reddit's servers.
    req_headers = {'User-Agent': 'reddit_stats by /u/quentinms', 'Cookie': c}

    after = ""
    #This is the maximum number of subreddits we can get at each call (default value is 25)
    limit = 100

    #While we have not reached the end of the list of subreddits
    while after != None :
        url = '%s.json?limit=%s&after=%s' % (subreddits_url, limit, after)
        req = Request(url, headers=req_headers)
        opener = build_opener()

        try:
            infos  = json.load(opener.open(req))
            after = infos['data']['after']
            extract_subreddits_from_json(infos)

            #We need to sleep 2 seconds between each API call as requested by reddit's rule.
            sleep(2)

        except urllib2.HTTPError:
            pass

#Get the name and number of subscribers for each subreddit in the list
def extract_subreddits_from_json(infos):
    for subreddit in infos['data']['children']:
        sub_name = subreddit['data']['url']
        sub_subscribers =  subreddit['data']['subscribers']

        print sub_name+", "+str(sub_subscribers)

get_subreddits()
	import json
	import time
	from time import sleep
	from urllib2 import Request
	from urllib2 import build_opener
	from urllib2 import HTTPError
	import Cookie


	def get_subreddits():

	subreddits_url = 'http://www.reddit.com/reddits'
	#We need this cookie in order to be sure to get the NSFW subreddits.
	c = Cookie.SimpleCookie()
	c['over18'] = '1'
	#We need to change the user-agent otherwise we will be blocked by reddit's servers.
	req_headers = {'User-Agent': 'reddit_stats by /u/quentinms', 'Cookie': c}

	after = ""
	#This is the maximum number of subreddits we can get at each call (default value is 25)
	limit = 100

	#While we have not reached the end of the list of subreddits
	while after != None :
	url = '%s.json?limit=%s&after=%s' % (subreddits_url, limit, after)
	req = Request(url, headers=req_headers)
	opener = build_opener()

	try:
	infos = json.load(opener.open(req))
	after = infos['data']['after']
	extract_subreddits_from_json(infos)

	#We need to sleep 2 seconds between each API call as requested by reddit's rule.
	sleep(2)

	except urllib2.HTTPError:
	pass

	#Get the name and number of subscribers for each subreddit in the list
	def extract_subreddits_from_json(infos):
	for subreddit in infos['data']['children']:
	sub_name = subreddit['data']['url']
	sub_subscribers = subreddit['data']['subscribers']

	print sub_name+", "+str(sub_subscribers)

	get_subreddits()