Skip to content

Instantly share code, notes, and snippets.

@backus
Created November 24, 2012 02:31
Show Gist options
  • Save backus/4138124 to your computer and use it in GitHub Desktop.
Save backus/4138124 to your computer and use it in GitHub Desktop.
HackerSpace Hackathon Problem #2 - Retrieve Reddit’s top cat pictures from today, yesterday, the day before yesterday, and n days ago.
import urllib2
import json
import datetime
'''Constants'''
catSubreddits = ['cat','catpictures','cats','flyingcats','catgifs','lolcats']
''' Prompt for num days back '''
nDaysSet=False
while nDaysSet is not True:
try:
nDays = int(raw_input("N Days Back > "))
if(nDays>=0):
nDaysSet = True
else:
print "Must provide positive integer"
except ValueError, e:
print "Must provide an integer"
''' Set utc time frames '''
utcNow = datetime.datetime.utcnow()
utcThen = datetime.datetime.utcnow() - datetime.timedelta(days=nDays)
'''Determine reddit top timeframe'''
timeframeWord=None
if(nDays>=0 and nDays<=1):
timeframeWord='day'
elif(nDays>1 and nDays<=7):
timeframeWord='week'
elif(nDays>7 and nDays<=31):
timeframeWord='month'
elif(nDays>31 and nDays<=365):
timeframeWord='year'
else:
timeframeWord='all'
'''Build API URL'''
apiBaseUrl = 'http://reddit.com/r/' + '+'.join(catSubreddits) + '/top/.json?t=' + timeframeWord + "&count=25&after="
'''Query for JSON'''
#Setup Reddit crawler
redditOpener = urllib2.build_opener()#reddit is shitty about user agents
redditOpener.addheaders = [('User-agent', 'Stanford Student Hackathon Challenge bot /u/jbackus')]
apiCurrUrl = apiBaseUrl
#crawl posts
topPost = None
while topPost is None:
topJson = redditOpener.open(apiCurrUrl).read()
submissions = json.loads(topJson)
submissions = submissions['data']['children']
lastPostId = None#Track last id for moving to next set of results
for submission in submissions:
lastPostId = submission['data']['id']
submittedDate = datetime.datetime.fromtimestamp(submission['data']['created_utc'])
if utcThen <= submittedDate <= utcNow:
topPost = {
'title' : submission['data']['title'],
'permalink': 'http://reddit.com'+submission['data']['permalink'],
'url' : submission['data']['url']
}
break
apiCurrUrl = apiBaseUrl + lastPostId
#Display post
if topPost is not None:
print "[+] Title: " + topPost['title']
print "[+] Permalink: " + topPost['permalink']
print "[+] URL: " + topPost['url']
else:
print 'No top post'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment