Created
May 4, 2024 22:39
-
-
Save Tarpstone/280137c2e4cbbb91328107a30c00dec7 to your computer and use it in GitHub Desktop.
Truncated version of March Madness modeling code, illustrating multiprocessing vs. array-based approaches.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import native Python packages | |
from enum import Enum | |
import multiprocessing | |
from time import perf_counter | |
# import third party packages | |
from fastapi import APIRouter, Depends, Path | |
from motor.motor_asyncio import AsyncIOMotorClient | |
import pandas | |
from odmantic import AIOEngine, Model | |
# import custom local stuff | |
from src.db.atlas import get_odm | |
ab_api = APIRouter( | |
prefix="/autobracket", | |
tags=["autobracket"], | |
) | |
class FantasyDataSeason(str, Enum): | |
PRIORSEASON1 = "2020" | |
CURRENTSEASON = "2021" | |
class CBBTeam(Model): | |
# data model representing one team | |
pass | |
class PlayerSeason(Model): | |
# data model representing one player's season | |
pass | |
class SimulationRun(Model): | |
# data model with game summary | |
pass | |
class SimulationDist(Model): | |
# data model for a set of simulation runs | |
pass | |
@ab_api.post( | |
"/sim/{season}/{away_team}/{home_team}/{sample_size}/{preserve_size}", | |
) | |
async def full_game_simulation( | |
season: FantasyDataSeason, | |
away_team: str, | |
home_team: str, | |
sample_size: int = Path(..., gt=0, le=1000), | |
preserve_size: int = Path(..., ge=10, le=100), | |
client: AsyncIOMotorClient = Depends(get_odm), | |
): | |
# performance timer | |
start_time = perf_counter() | |
engine = AIOEngine(motor_client=client, database="autobracket") | |
matchup_data = [ | |
player_season | |
async for player_season in engine.find( | |
PlayerSeason, | |
(PlayerSeason.Season == season) | |
& ((PlayerSeason.Team == away_team) | (PlayerSeason.Team == home_team)), | |
sort=(PlayerSeason.Team, PlayerSeason.StatID), | |
) | |
] | |
# create a dataframe representing one simulation | |
matchup_df = pandas.DataFrame( | |
[player_season.doc() for player_season in matchup_data] | |
) | |
# create an Away and Home field for identification in the simulation | |
matchup_df["designation"] = "home" | |
matchup_df.loc[matchup_df["Team"] == away_team, "designation"] = "away" | |
# pull Kenpom tempo data for the two teams | |
kenpom_data = [ | |
team | |
async for team in engine.find( | |
CBBTeam, | |
(CBBTeam.Season == season) | |
& ((CBBTeam.Key == away_team) | (CBBTeam.Key == home_team)), | |
sort=(CBBTeam.Key), | |
) | |
] | |
kenpom_df = pandas.DataFrame([team.doc() for team in kenpom_data]) | |
kenpom_tempo = kenpom_df.AdjT.sum() | |
# if multiprocessing, create a list of matchup dfs representing multiple simulations | |
if False: | |
cores_to_use = multiprocessing.cpu_count() | |
simulations = [matchup_df.copy() for x in range(sample_size)] | |
with multiprocessing.Pool(processes=cores_to_use) as p: | |
results = p.map(run_simulation, simulations) | |
# clean up | |
p.close() | |
p.join() | |
else: | |
# new array program is working! | |
results, distribution = run_simulation( | |
matchup_df, season, sample_size, preserve_size, kenpom_tempo | |
) | |
sim_time = perf_counter() | |
writes = [SimulationRun(**doc) for doc in results] + [ | |
SimulationDist(**distribution) | |
] | |
# write results to MongoDB | |
await engine.save_all(writes) | |
db_time = perf_counter() | |
return { | |
"success": "Check database for output!", | |
"sim_time": (sim_time - start_time), | |
"db_time": (db_time - sim_time), | |
"simulations": sample_size, | |
} | |
def run_simulation(matchup_df, season, sample_size, preserve_size, kenpom_tempo): | |
# basketball game simulation logic goes here | |
return results_array, distribution_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment