Skip to content

Instantly share code, notes, and snippets.

@tswicegood
Created April 29, 2013 17:29
Show Gist options
  • Save tswicegood/5483232 to your computer and use it in GitHub Desktop.
Save tswicegood/5483232 to your computer and use it in GitHub Desktop.
Quick script for scraping the current rankins from the GEN public choice awards.
"""
Quick script for scraping the current rankins from the GEN public choice
awards.
"""
from pyquery import PyQuery as pq
import requests
URL = "https://app.wizehive.com/voting/dja2013/13447/%d"
entries = []
for i in range(1, 9):
# Use requests directly to avoid issues with unicode decoding
doc = pq(requests.get(URL % i).content.decode('utf8', 'ignore'))
for item in doc(".item"):
name = item.find("div").text_content().strip()
raw_votes = item.getchildren()[-1].text_content().strip()
votes = int(raw_votes.split("\t\t\t")[1]) if raw_votes else 0
entries.append((name, votes))
i = 0
for entry in sorted(entries, key=lambda a: -a[1]):
i += 1
print "%02d - %s: %d" % (i, entry[0], entry[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment