Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Extract related subreddits from subreddit description.

View gist:1941011
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
 
import re
from models import RedditInfo
from google.appengine.ext.db import BadValueError
 
reLink = re.compile('/r/[a-zA-Z0-9_-]+', re.I)
 
def parseRedditLinks(text, exclude):
if text is not None:
return sorted(set([match.lower() for match in reLink.findall(text) if match.lower != exclude]))
else:
return []
 
def convertJsonToRedditInfoModel(entry):
"""
Converts JSON data entry of subreddit description (e.g. http://www.reddit.com/r/science/about.json )
to the model entry
"""
try:
url = entry['url'].lower()
description = entry['description']
parsedLinks = parseRedditLinks(description, exclude=url)
return RedditInfo(key_name=url,
url = url,
display_name = entry['display_name'],
name = entry['name'],
title = entry['title'],
created = entry['created'],
created_utc = entry['created_utc'],
over18 = entry['over18'],
subscribers = entry['subscribers'],
id = entry['id'],
description = description,
parsedLinks = parsedLinks)
 
except BadValueError, ex:
# Some entries has multiline urls which crashes this code... I assume they are invalid.
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.