Tarpstone/autobracket.py

## autobracket.py
# import native Python packages
from enum import Enum
import multiprocessing
from time import perf_counter

# import third party packages
from fastapi import APIRouter, Depends, Path
from motor.motor_asyncio import AsyncIOMotorClient
import pandas
from odmantic import AIOEngine, Model

# import custom local stuff
from src.db.atlas import get_odm


ab_api = APIRouter(
    prefix="/autobracket",
    tags=["autobracket"],
)


class FantasyDataSeason(str, Enum):
    PRIORSEASON1 = "2020"
    CURRENTSEASON = "2021"


class CBBTeam(Model):
    # data model representing one team
    pass


class PlayerSeason(Model):
    # data model representing one player's season
    pass


class SimulationRun(Model):
    # data model with game summary
    pass


class SimulationDist(Model):
    # data model for a set of simulation runs
    pass


@ab_api.post(
    "/sim/{season}/{away_team}/{home_team}/{sample_size}/{preserve_size}",
)
async def full_game_simulation(
    season: FantasyDataSeason,
    away_team: str,
    home_team: str,
    sample_size: int = Path(..., gt=0, le=1000),
    preserve_size: int = Path(..., ge=10, le=100),
    client: AsyncIOMotorClient = Depends(get_odm),
):
    # performance timer
    start_time = perf_counter()

    engine = AIOEngine(motor_client=client, database="autobracket")
    matchup_data = [
        player_season
        async for player_season in engine.find(
            PlayerSeason,
            (PlayerSeason.Season == season)
            & ((PlayerSeason.Team == away_team) | (PlayerSeason.Team == home_team)),
            sort=(PlayerSeason.Team, PlayerSeason.StatID),
        )
    ]

    # create a dataframe representing one simulation
    matchup_df = pandas.DataFrame(
        [player_season.doc() for player_season in matchup_data]
    )
    # create an Away and Home field for identification in the simulation
    matchup_df["designation"] = "home"
    matchup_df.loc[matchup_df["Team"] == away_team, "designation"] = "away"

    # pull Kenpom tempo data for the two teams
    kenpom_data = [
        team
        async for team in engine.find(
            CBBTeam,
            (CBBTeam.Season == season)
            & ((CBBTeam.Key == away_team) | (CBBTeam.Key == home_team)),
            sort=(CBBTeam.Key),
        )
    ]
    kenpom_df = pandas.DataFrame([team.doc() for team in kenpom_data])
    kenpom_tempo = kenpom_df.AdjT.sum()

    # if multiprocessing, create a list of matchup dfs representing multiple simulations
    if False:
        cores_to_use = multiprocessing.cpu_count()
        simulations = [matchup_df.copy() for x in range(sample_size)]

        with multiprocessing.Pool(processes=cores_to_use) as p:
            results = p.map(run_simulation, simulations)
            # clean up
            p.close()
            p.join()
    else:
        # new array program is working!
        results, distribution = run_simulation(
            matchup_df, season, sample_size, preserve_size, kenpom_tempo
        )

    sim_time = perf_counter()

    writes = [SimulationRun(**doc) for doc in results] + [
        SimulationDist(**distribution)
    ]

    # write results to MongoDB
    await engine.save_all(writes)

    db_time = perf_counter()

    return {
        "success": "Check database for output!",
        "sim_time": (sim_time - start_time),
        "db_time": (db_time - sim_time),
        "simulations": sample_size,
    }


def run_simulation(matchup_df, season, sample_size, preserve_size, kenpom_tempo):
    # basketball game simulation logic goes here
    return results_array, distribution_data
	# import native Python packages
	from enum import Enum
	import multiprocessing
	from time import perf_counter

	# import third party packages
	from fastapi import APIRouter, Depends, Path
	from motor.motor_asyncio import AsyncIOMotorClient
	import pandas
	from odmantic import AIOEngine, Model

	# import custom local stuff
	from src.db.atlas import get_odm


	ab_api = APIRouter(
	prefix="/autobracket",
	tags=["autobracket"],
	)


	class FantasyDataSeason(str, Enum):
	PRIORSEASON1 = "2020"
	CURRENTSEASON = "2021"


	class CBBTeam(Model):
	# data model representing one team
	pass


	class PlayerSeason(Model):
	# data model representing one player's season
	pass


	class SimulationRun(Model):
	# data model with game summary
	pass


	class SimulationDist(Model):
	# data model for a set of simulation runs
	pass


	@ab_api.post(
	"/sim/{season}/{away_team}/{home_team}/{sample_size}/{preserve_size}",
	)
	async def full_game_simulation(
	season: FantasyDataSeason,
	away_team: str,
	home_team: str,
	sample_size: int = Path(..., gt=0, le=1000),
	preserve_size: int = Path(..., ge=10, le=100),
	client: AsyncIOMotorClient = Depends(get_odm),
	):
	# performance timer
	start_time = perf_counter()

	engine = AIOEngine(motor_client=client, database="autobracket")
	matchup_data = [
	player_season
	async for player_season in engine.find(
	PlayerSeason,
	(PlayerSeason.Season == season)
	& ((PlayerSeason.Team == away_team) \| (PlayerSeason.Team == home_team)),
	sort=(PlayerSeason.Team, PlayerSeason.StatID),
	)
	]

	# create a dataframe representing one simulation
	matchup_df = pandas.DataFrame(
	[player_season.doc() for player_season in matchup_data]
	)
	# create an Away and Home field for identification in the simulation
	matchup_df["designation"] = "home"
	matchup_df.loc[matchup_df["Team"] == away_team, "designation"] = "away"

	# pull Kenpom tempo data for the two teams
	kenpom_data = [
	team
	async for team in engine.find(
	CBBTeam,
	(CBBTeam.Season == season)
	& ((CBBTeam.Key == away_team) \| (CBBTeam.Key == home_team)),
	sort=(CBBTeam.Key),
	)
	]
	kenpom_df = pandas.DataFrame([team.doc() for team in kenpom_data])
	kenpom_tempo = kenpom_df.AdjT.sum()

	# if multiprocessing, create a list of matchup dfs representing multiple simulations
	if False:
	cores_to_use = multiprocessing.cpu_count()
	simulations = [matchup_df.copy() for x in range(sample_size)]

	with multiprocessing.Pool(processes=cores_to_use) as p:
	results = p.map(run_simulation, simulations)
	# clean up
	p.close()
	p.join()
	else:
	# new array program is working!
	results, distribution = run_simulation(
	matchup_df, season, sample_size, preserve_size, kenpom_tempo
	)

	sim_time = perf_counter()

	writes = [SimulationRun(**doc) for doc in results] + [
	SimulationDist(**distribution)
	]

	# write results to MongoDB
	await engine.save_all(writes)

	db_time = perf_counter()

	return {
	"success": "Check database for output!",
	"sim_time": (sim_time - start_time),
	"db_time": (db_time - sim_time),
	"simulations": sample_size,
	}


	def run_simulation(matchup_df, season, sample_size, preserve_size, kenpom_tempo):
	# basketball game simulation logic goes here
	return results_array, distribution_data