avoliva/gist:ba8d157adb5c19f7dd2d

## gistfile1.py
from BeautifulSoup import BeautifulSoup
import requests
from xml.sax import saxutils as su


class Comment(object):

    def __init__(self, id, html, author,
                 points, postedOn, level, parent=None):
        self.id = id
        self.content =html
        self.user = author
        self.points = points
        self.created = postedOn
        self.level = level
        self.parent = parent
        self.parent_content = None


class Reddit(object):

    def load_comment(self, data, level):
        comment = Comment(
            id=data.get('name'),
            =data.get('body_html'),
            author=data.get('author'),
            points=data.get('score'),
            postedOn=data.get('created_utc'),
            level=level,
            parent=data.get('parent_id')
        )
        return comment

    def process(self, comments, c, level):
        for item in c:
            if item.get('kind') is None:
                continue
            if not item.get("kind") == "t1":
                continue
            data = item.get('data')
            comment = self.load_comment(data, level)
            comment.content = '<br/><br/>'.join(
                [str(x.contents[0]) for x in
                BeautifulSoup(su.unescape(comment.content)).find('div').findAll('p')])
            if comment.parent.split('_')[0] == 't1':
                comment.parent_content = next((l for l in comments if l.id == comment.parent), None).content
            if comment.user:
                comments.append(comment)
                self.add_replies(comments,data,level+1)

        return comments

    def add_replies(self, comments, parent, level):
        if not parent.get("replies"):
            return
        r = parent['replies']['data']['children']
        self.process(comments, r, level)

    def load_subreddit_list(self, subreddit):
        headers = {
            'User-Agent': 'python/requests',
        }
        listing = requests.get('https://reddit.com/r/{}.json'.format(subreddit), headers=headers)

        x = [dict(
            subreddit=c['data']['subreddit'],
            score=c['data']['score'],
            message_count=c['data']['score'],
            user=c['data']['author'],
            permalink=c['data']['permalink'],
            created_utc=c['data']['created_utc'],
            url=c['data']['url'],
            title=c['data']['title'],
            ups=c['data']['ups'],
            downs=c['data']['downs'],
        ) for c in listing.json()['data']['children']]
        return x

    def load_subreddit_posts(self, subreddit, topic_id, title):
        headers = {
            'User-Agent': 'python/requests',
        }

        posts = requests.get(
            'https://www.reddit.com/r/{}/comments/{}/{}/.json'.format(
                subreddit, topic_id, title),
            headers=headers
        )
        comments = list()
        r = posts.json()[1]['data']['children']

        comments = self.process(comments, r, 0)

        return comments
	from BeautifulSoup import BeautifulSoup
	import requests
	from xml.sax import saxutils as su


	class Comment(object):

	def __init__(self, id, html, author,
	points, postedOn, level, parent=None):
	self.id = id
	self.content =html
	self.user = author
	self.points = points
	self.created = postedOn
	self.level = level
	self.parent = parent
	self.parent_content = None


	class Reddit(object):

	def load_comment(self, data, level):
	comment = Comment(
	id=data.get('name'),
	=data.get('body_html'),
	author=data.get('author'),
	points=data.get('score'),
	postedOn=data.get('created_utc'),
	level=level,
	parent=data.get('parent_id')
	)
	return comment

	def process(self, comments, c, level):
	for item in c:
	if item.get('kind') is None:
	continue
	if not item.get("kind") == "t1":
	continue
	data = item.get('data')
	comment = self.load_comment(data, level)
	comment.content = '<br/><br/>'.join(
	[str(x.contents[0]) for x in
	BeautifulSoup(su.unescape(comment.content)).find('div').findAll('p')])
	if comment.parent.split('_')[0] == 't1':
	comment.parent_content = next((l for l in comments if l.id == comment.parent), None).content
	if comment.user:
	comments.append(comment)
	self.add_replies(comments,data,level+1)

	return comments

	def add_replies(self, comments, parent, level):
	if not parent.get("replies"):
	return
	r = parent['replies']['data']['children']
	self.process(comments, r, level)

	def load_subreddit_list(self, subreddit):
	headers = {
	'User-Agent': 'python/requests',
	}
	listing = requests.get('https://reddit.com/r/{}.json'.format(subreddit), headers=headers)

	x = [dict(
	subreddit=c['data']['subreddit'],
	score=c['data']['score'],
	message_count=c['data']['score'],
	user=c['data']['author'],
	permalink=c['data']['permalink'],
	created_utc=c['data']['created_utc'],
	url=c['data']['url'],
	title=c['data']['title'],
	ups=c['data']['ups'],
	downs=c['data']['downs'],
	) for c in listing.json()['data']['children']]
	return x

	def load_subreddit_posts(self, subreddit, topic_id, title):
	headers = {
	'User-Agent': 'python/requests',
	}

	posts = requests.get(
	'https://www.reddit.com/r/{}/comments/{}/{}/.json'.format(
	subreddit, topic_id, title),
	headers=headers
	)
	comments = list()
	r = posts.json()[1]['data']['children']

	comments = self.process(comments, r, 0)

	return comments