public
Created

Extract related subreddits from subreddit description.

  • Download Gist
gistfile1.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
 
import re
from models import RedditInfo
from google.appengine.ext.db import BadValueError
 
reLink = re.compile('/r/[a-zA-Z0-9_-]+', re.I)
 
def parseRedditLinks(text, exclude):
if text is not None:
return sorted(set([match.lower() for match in reLink.findall(text) if match.lower != exclude]))
else:
return []
 
def convertJsonToRedditInfoModel(entry):
"""
Converts JSON data entry of subreddit description (e.g. http://www.reddit.com/r/science/about.json )
to the model entry
"""
try:
url = entry['url'].lower()
description = entry['description']
parsedLinks = parseRedditLinks(description, exclude=url)
return RedditInfo(key_name=url,
url = url,
display_name = entry['display_name'],
name = entry['name'],
title = entry['title'],
created = entry['created'],
created_utc = entry['created_utc'],
over18 = entry['over18'],
subscribers = entry['subscribers'],
id = entry['id'],
description = description,
parsedLinks = parsedLinks)
 
except BadValueError, ex:
# Some entries has multiline urls which crashes this code... I assume they are invalid.
return None

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.