Skip to content

Instantly share code, notes, and snippets.

@ginatrapani
Forked from samuelclay/entries.md
Created March 29, 2012 14:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ginatrapani/2238136 to your computer and use it in GitHub Desktop.
Save ginatrapani/2238136 to your computer and use it in GitHub Desktop.
Top entries in Knight News Challenge 2012

Results on Tues. March 27th, 2012:

Found 378 active entries among 958 total applications.


Results on Wed. March 21st, 2012:

Found 323 active entries among 958 total applications.

# Screen scrapes the Knight News Challenge entries (all 64 pages of them)
# and counts the number of votes/hearts for each entry. Then displays them
# in rank order.
#
# This script runs in about 20 seconds.
import requests
from BeautifulSoup import BeautifulSoup
page = 1
total_entry_count = 0
entries = []
while True:
print " ---> Found %s entries so far. Now on page: %s" % (len(entries), page)
knight_url = "http://newschallenge.tumblr.com/page/%s" % (page)
html = requests.get(knight_url).content
soup = BeautifulSoup(html)
postboxes = soup.findAll("div", "postbox")
# Done if only sticky entry is left.
if len(postboxes) <= 1:
break
page += 1
# 15 entries per page, plus a sticky throwaway entry
for entry in postboxes:
if 'stickyPost' in entry.get('class'): continue
total_entry_count += 1
likes = entry.find("", "home-likes")
if likes and likes.text:
likes = int(likes.text)
else:
likes = 0
comments = entry.find("", "home-comments")
if comments and comments.text:
comments = int(comments.text)
else:
comments = 0
title = entry.find("h2")
if title:
title = title.text
url = entry.find('a', "home-view")
if url:
url = url.get('href')
# Only record active entries
if comments or likes:
entries.append({
'likes': likes,
'comments': comments,
'title': title,
'url': url,
})
# time.sleep(random.randint(0, 2))
entries.sort(key=lambda e: e['comments'] + e['likes'])
active_entry_count = len(entries)
for i, entry in enumerate(entries):
print " * #%s: %s likes - [%s](%s)" % (
active_entry_count - i,
entry['likes'], entry['title'],
entry['url'])
print " ***> Found %s active entries among %s total applications." % (
active_entry_count, total_entry_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment