Skip to content

Instantly share code, notes, and snippets.

@jerith
Created May 15, 2012 16:45
Show Gist options
  • Save jerith/2703162 to your computer and use it in GitHub Desktop.
Save jerith/2703162 to your computer and use it in GitHub Desktop.
pyweek game grabber
#!/usr/bin/env python
from bs4 import BeautifulSoup
INDEX_FILE = 'index.html'
SCRIPT_FILE = 'get_games.sh'
soup = BeautifulSoup(open(INDEX_FILE).read())
script = open(SCRIPT_FILE, 'w')
def process_game(th, td):
team_name = th.contents[-1].strip()[1:-1]
urls = [a.get('href') for a in td.find_all('a')]
urls = [url for url in urls if "media.pyweek.org" in url]
script.write('mkdir %s\npushd %s\n' % (team_name, team_name))
for url in urls:
script.write('wget -c "%s"\n' % url)
script.write('popd\n\n')
rows = soup.find_all('tr')
while rows:
th = rows[0].th
td = rows[1].td
rows = rows [2:]
process_game(th, td)
script.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment