Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Extract related subreddits from subreddit description.
import re
from models import RedditInfo
from google.appengine.ext.db import BadValueError
reLink = re.compile('/r/[a-zA-Z0-9_-]+', re.I)
def parseRedditLinks(text, exclude):
if text is not None:
return sorted(set([match.lower() for match in reLink.findall(text) if match.lower != exclude]))
return []
def convertJsonToRedditInfoModel(entry):
Converts JSON data entry of subreddit description (e.g. )
to the model entry
url = entry['url'].lower()
description = entry['description']
parsedLinks = parseRedditLinks(description, exclude=url)
return RedditInfo(key_name=url,
url = url,
display_name = entry['display_name'],
name = entry['name'],
title = entry['title'],
created = entry['created'],
created_utc = entry['created_utc'],
over18 = entry['over18'],
subscribers = entry['subscribers'],
id = entry['id'],
description = description,
parsedLinks = parsedLinks)
except BadValueError, ex:
# Some entries has multiline urls which crashes this code... I assume they are invalid.
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment