Create a gist now

Instantly share code, notes, and snippets.

Extract related subreddits from subreddit description.
import re
from models import RedditInfo
from google.appengine.ext.db import BadValueError
reLink = re.compile('/r/[a-zA-Z0-9_-]+', re.I)
def parseRedditLinks(text, exclude):
if text is not None:
return sorted(set([match.lower() for match in reLink.findall(text) if match.lower != exclude]))
return []
def convertJsonToRedditInfoModel(entry):
Converts JSON data entry of subreddit description (e.g. )
to the model entry
url = entry['url'].lower()
description = entry['description']
parsedLinks = parseRedditLinks(description, exclude=url)
return RedditInfo(key_name=url,
url = url,
display_name = entry['display_name'],
name = entry['name'],
title = entry['title'],
created = entry['created'],
created_utc = entry['created_utc'],
over18 = entry['over18'],
subscribers = entry['subscribers'],
id = entry['id'],
description = description,
parsedLinks = parsedLinks)
except BadValueError, ex:
# Some entries has multiline urls which crashes this code... I assume they are invalid.
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment