Skip to content

Instantly share code, notes, and snippets.

@Tarpstone
Created May 4, 2024 22:39
Show Gist options
  • Save Tarpstone/280137c2e4cbbb91328107a30c00dec7 to your computer and use it in GitHub Desktop.
Save Tarpstone/280137c2e4cbbb91328107a30c00dec7 to your computer and use it in GitHub Desktop.
Truncated version of March Madness modeling code, illustrating multiprocessing vs. array-based approaches.
# import native Python packages
from enum import Enum
import multiprocessing
from time import perf_counter
# import third party packages
from fastapi import APIRouter, Depends, Path
from motor.motor_asyncio import AsyncIOMotorClient
import pandas
from odmantic import AIOEngine, Model
# import custom local stuff
from src.db.atlas import get_odm
ab_api = APIRouter(
prefix="/autobracket",
tags=["autobracket"],
)
class FantasyDataSeason(str, Enum):
PRIORSEASON1 = "2020"
CURRENTSEASON = "2021"
class CBBTeam(Model):
# data model representing one team
pass
class PlayerSeason(Model):
# data model representing one player's season
pass
class SimulationRun(Model):
# data model with game summary
pass
class SimulationDist(Model):
# data model for a set of simulation runs
pass
@ab_api.post(
"/sim/{season}/{away_team}/{home_team}/{sample_size}/{preserve_size}",
)
async def full_game_simulation(
season: FantasyDataSeason,
away_team: str,
home_team: str,
sample_size: int = Path(..., gt=0, le=1000),
preserve_size: int = Path(..., ge=10, le=100),
client: AsyncIOMotorClient = Depends(get_odm),
):
# performance timer
start_time = perf_counter()
engine = AIOEngine(motor_client=client, database="autobracket")
matchup_data = [
player_season
async for player_season in engine.find(
PlayerSeason,
(PlayerSeason.Season == season)
& ((PlayerSeason.Team == away_team) | (PlayerSeason.Team == home_team)),
sort=(PlayerSeason.Team, PlayerSeason.StatID),
)
]
# create a dataframe representing one simulation
matchup_df = pandas.DataFrame(
[player_season.doc() for player_season in matchup_data]
)
# create an Away and Home field for identification in the simulation
matchup_df["designation"] = "home"
matchup_df.loc[matchup_df["Team"] == away_team, "designation"] = "away"
# pull Kenpom tempo data for the two teams
kenpom_data = [
team
async for team in engine.find(
CBBTeam,
(CBBTeam.Season == season)
& ((CBBTeam.Key == away_team) | (CBBTeam.Key == home_team)),
sort=(CBBTeam.Key),
)
]
kenpom_df = pandas.DataFrame([team.doc() for team in kenpom_data])
kenpom_tempo = kenpom_df.AdjT.sum()
# if multiprocessing, create a list of matchup dfs representing multiple simulations
if False:
cores_to_use = multiprocessing.cpu_count()
simulations = [matchup_df.copy() for x in range(sample_size)]
with multiprocessing.Pool(processes=cores_to_use) as p:
results = p.map(run_simulation, simulations)
# clean up
p.close()
p.join()
else:
# new array program is working!
results, distribution = run_simulation(
matchup_df, season, sample_size, preserve_size, kenpom_tempo
)
sim_time = perf_counter()
writes = [SimulationRun(**doc) for doc in results] + [
SimulationDist(**distribution)
]
# write results to MongoDB
await engine.save_all(writes)
db_time = perf_counter()
return {
"success": "Check database for output!",
"sim_time": (sim_time - start_time),
"db_time": (db_time - sim_time),
"simulations": sample_size,
}
def run_simulation(matchup_df, season, sample_size, preserve_size, kenpom_tempo):
# basketball game simulation logic goes here
return results_array, distribution_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment