Skip to content

Instantly share code, notes, and snippets.

@ChrisBeaumont
Created January 26, 2016 06:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChrisBeaumont/c643d97237022a009041 to your computer and use it in GitHub Desktop.
Save ChrisBeaumont/c643d97237022a009041 to your computer and use it in GitHub Desktop.
import requests
from soupy import Soupy, Q
URL = "https://sites.google.com/site/wheeloffortunepuzzlecompendium/home/compendium/season-%i-compendium"
def scrape():
for i in range(1, 31):
dom = Soupy(requests.get(URL % i).text, 'html5')
data = (
dom
.find('td', 'sites-tile-name-content-1')
.find('tbody')
.find_all('tr')
.each(Q.find_all('td').each(Q.text))[1:]
.val()
)
yield from ((puzzle, category) for puzzle, category, _, _ in data)
if __name__ == "__main__":
from more_itertools import take
print(list(take(5, scrape())))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment