Created
January 26, 2022 01:56
-
-
Save kevdougful/5e221e1e1895493b5404c90141c6914e to your computer and use it in GitHub Desktop.
ABC Ballpark Schedule Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import attr | |
from bs4 import BeautifulSoup | |
from datetime import datetime, date, timedelta | |
import httpx | |
from icalendar import Calendar, Event | |
from uuid import uuid4 | |
@attr.s(auto_attribs=True) | |
class Game: | |
time: datetime | |
location: str | |
division: str | |
home: str | |
away: str | |
@attr.s(auto_attribs=True) | |
class Schedule: | |
sport: str | |
kid: str | |
team: str = "" | |
division: int = 1 | |
location_in_title: bool = True | |
games: list = None | |
def scrape(self): | |
self.fetch() | |
self.get_games() | |
self.save() | |
def fetch(self): | |
req = httpx.post( | |
url="http://www.abcballpark.com/eventschedule/index.php", | |
data={ | |
"srch_divn": self.division, | |
"srch_team": self.team, | |
} | |
) | |
self._text = req.text | |
self._soup = BeautifulSoup(self._text, "html.parser") | |
return self | |
def save(self): | |
with open(f"{self.kid}.ics", "wb") as f: | |
f.write(self.to_ical()) | |
return self | |
def get_games(self): | |
table = self._soup.table | |
# Strip game lines from the table, excluding postponed | |
games = [g for g in table.find_all("tr") if "Postponed" not in str(g)][1:-1] | |
self.games = [self._parse_game(g) for g in games] | |
return self | |
def to_ical(self): | |
cal = Calendar() | |
for game in self.games: | |
title = f"{self.sport} - {self.kid} {game.location if self.location_in_title else ''}" | |
if "ABC" in game.location: | |
game.location = "ABC Ballpark, 10500 Livingston Ave, St Ann, MO 63074, USA" | |
elif "BMAC" in game.location: | |
game.location = "BMAC, 13161 Taussig Rd, Bridgeton, MO 63044, USA" | |
startstr = game.time.strftime("%Y%m%dT%H%M%S") | |
endstr = (game.time + timedelta(hours=1)).strftime("%Y%m%dT%H%M%S") | |
new_game = Event( | |
uid=uuid4(), | |
dtstamp=startstr, | |
dtstart=startstr, | |
dtend=endstr, | |
location=game.location, | |
summary=title, | |
description=f"{game.division}: {game.away} @ {game.home}" | |
) | |
cal.add_component(new_game) | |
cal.add('prodid', '-//abcballpark.com//Game Schedule 1.0//EN') | |
cal.add('version', '2.0') | |
return cal.to_ical() | |
def _parse_team(self, game_ics): | |
game_ics.find_all("br")[0].replace_with(" ") | |
return game_ics.text.replace(u"\xa0", u"") | |
def _parse_game(self, game, year=date.today().year): | |
cells = [c for c in game.find_all("td")] | |
game_date, game_time, game_field, game_division, _, home, _, away, _ = cells | |
# Parse date | |
game_date_obj = datetime.strptime(f"{game_date.text} {year} {game_time.text}", "%a %b %d %Y %I:%M %p") | |
# Parse field string | |
game_field.find_all("br")[0].replace_with(" ") | |
home = self._parse_team(home) | |
away = self._parse_team(away) | |
new_game = Game( | |
time=game_date_obj, | |
division=game_division.text, | |
location=game_field.text, | |
home=home, | |
away=away, | |
) | |
return new_game | |
s = Schedule( | |
kid="Addie", | |
division=2, | |
sport="🏀 Basketball", | |
location_in_title=False, | |
team="Spirits-Red" | |
).scrape() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment