Skip to content

Instantly share code, notes, and snippets.

@dirn
Last active September 11, 2015 18:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dirn/43e8c891e008444fc66d to your computer and use it in GitHub Desktop.
Save dirn/43e8c891e008444fc66d to your computer and use it in GitHub Desktop.
import csv
from bs4 import BeautifulSoup
import requests
in_url = 'http://rangers.nhl.com/club/schedule.htm'
# in_filename = 'schedule.html'
out_filename = 'schedule.csv'
resp = requests.get(in_url)
soup = BeautifulSoup(resp.text)
# with open(in_filename) as f:
# soup = BeautifulSoup(f)
strip = lambda s: s.contents[0].strip()
with open(out_filename, 'w', newline='') as f:
writer = csv.writer(f, delimiter='\t')
table = soup.find('table', {'class': 'data'})
for row in table.find_all('tr'):
class_names = row.attrs.get('class', [])
if not class_names:
# Table header
continue
if 'hdr' in class_names:
# Month header
continue
date, away, home, time, *_ = map(strip, row.find_all('td'))
if home != 'Rangers':
# Away game
continue
writer.writerow((away, date, time))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment