Skip to content

Instantly share code, notes, and snippets.

@kevdougful
Created January 26, 2022 01:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevdougful/5e221e1e1895493b5404c90141c6914e to your computer and use it in GitHub Desktop.
Save kevdougful/5e221e1e1895493b5404c90141c6914e to your computer and use it in GitHub Desktop.
ABC Ballpark Schedule Scraper
import attr
from bs4 import BeautifulSoup
from datetime import datetime, date, timedelta
import httpx
from icalendar import Calendar, Event
from uuid import uuid4
@attr.s(auto_attribs=True)
class Game:
time: datetime
location: str
division: str
home: str
away: str
@attr.s(auto_attribs=True)
class Schedule:
sport: str
kid: str
team: str = ""
division: int = 1
location_in_title: bool = True
games: list = None
def scrape(self):
self.fetch()
self.get_games()
self.save()
def fetch(self):
req = httpx.post(
url="http://www.abcballpark.com/eventschedule/index.php",
data={
"srch_divn": self.division,
"srch_team": self.team,
}
)
self._text = req.text
self._soup = BeautifulSoup(self._text, "html.parser")
return self
def save(self):
with open(f"{self.kid}.ics", "wb") as f:
f.write(self.to_ical())
return self
def get_games(self):
table = self._soup.table
# Strip game lines from the table, excluding postponed
games = [g for g in table.find_all("tr") if "Postponed" not in str(g)][1:-1]
self.games = [self._parse_game(g) for g in games]
return self
def to_ical(self):
cal = Calendar()
for game in self.games:
title = f"{self.sport} - {self.kid} {game.location if self.location_in_title else ''}"
if "ABC" in game.location:
game.location = "ABC Ballpark, 10500 Livingston Ave, St Ann, MO 63074, USA"
elif "BMAC" in game.location:
game.location = "BMAC, 13161 Taussig Rd, Bridgeton, MO 63044, USA"
startstr = game.time.strftime("%Y%m%dT%H%M%S")
endstr = (game.time + timedelta(hours=1)).strftime("%Y%m%dT%H%M%S")
new_game = Event(
uid=uuid4(),
dtstamp=startstr,
dtstart=startstr,
dtend=endstr,
location=game.location,
summary=title,
description=f"{game.division}: {game.away} @ {game.home}"
)
cal.add_component(new_game)
cal.add('prodid', '-//abcballpark.com//Game Schedule 1.0//EN')
cal.add('version', '2.0')
return cal.to_ical()
def _parse_team(self, game_ics):
game_ics.find_all("br")[0].replace_with(" ")
return game_ics.text.replace(u"\xa0", u"")
def _parse_game(self, game, year=date.today().year):
cells = [c for c in game.find_all("td")]
game_date, game_time, game_field, game_division, _, home, _, away, _ = cells
# Parse date
game_date_obj = datetime.strptime(f"{game_date.text} {year} {game_time.text}", "%a %b %d %Y %I:%M %p")
# Parse field string
game_field.find_all("br")[0].replace_with(" ")
home = self._parse_team(home)
away = self._parse_team(away)
new_game = Game(
time=game_date_obj,
division=game_division.text,
location=game_field.text,
home=home,
away=away,
)
return new_game
s = Schedule(
kid="Addie",
division=2,
sport="🏀 Basketball",
location_in_title=False,
team="Spirits-Red"
).scrape()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment