JotaRata/bootstrap.py

## bootstrap.py
import random
from typing import Any, Callable, Concatenate, List, ParamSpec, ParamSpecKwargs, Tuple, cast
import numpy as np
from numpy.typing import NDArray,ArrayLike
from pandas import DataFrame, Series
# ---------------- Bootstrapping -------------------------------
_ArrayLike = NDArray[np.float_] | None
_NumFunction = Callable[..., float]

def sampler(sample :  List[float],
				stat : _NumFunction = np.nanmedian,
				est : _NumFunction = np.nanmedian,
				iter : int = 1000,
				include_error : bool = False,
				print_values : bool = False) -> Tuple[float, float] | float:
	'''
		## Bootstrap sampler
		Randomly samples a list of values, computing the estimator function each time and returning the statistic of the computed values.

		### Parameters:
		* sample (list or array) : Input list of values
		* stat (function) : The statistic to compute from the subsample estimator values, default is np.nanmedian
		* est (function) : The estimator function computed for each sub-sample, default is np.nanmedian
		* iter (int) : Number of sub-samples to compute, default is 1000
		* include_error (bool) : Whether to include the standard deviation of the estimated values along the statistic, if True then the return output is going to be a tuple of two floats, default is False.
		* print_values (bool) : For debugging purposes, whether to print the estimated values on screen.

		### Returns:
		A single float or a tuple of floats if include_error is enabled.
		### Notes:
		The function works by creating a sub-sample of the input of the same length then calling the estimator function for this sub-sample and appending the result value into a temporal list, after repeating this process a by number of iteration the final statistic is computed from the list of estimated values.

		It's important that the estimator function and the statistic function both take a list or array of values and return a single float, for multivariate sampling see bootstrap.sampler_mv.

		The process of computing bootstrap statistics can be slow, for further optimizations consider compiling this module with mypyc.
	'''

	values : List[float] = list[float]()
	_size = len(sample)
	if _size < 3:
		if include_error:
			return np.nan, np.nan
		else:
			return np.nan
	i : int
	for i in range(iter):
		boot_sample : List[float] = np.random.choice(sample, replace = True, size = len(sample)).tolist()
		boot_mean = est(boot_sample)
		values.append(boot_mean)

	if print_values:
		print('values: ', values)
	if include_error:
		return stat(values), cast(float, np.nanstd(values))
	else:
		return stat(values)

def sampler_mv(sample : Tuple[ List[float],  List[float]],
				stat : _NumFunction = np.nanmedian,
				est : Callable[..., float] | None = None,
				iter : int = 1000,
				indices : List[int]|None = None,
				include_error : bool = False,
				print_values : bool = False) -> Tuple[float, float] | float:
	'''
	## Multivariate bootstrap sampler
	Randomly samples a list of vectors, computing the estimator function each time and returning the statistic of the computed values.

	### Parameters:
	* sample (list or array of 2D vectors) : Input rows and columns, it must be provided as a tuple of lists or a 2D array.
	* stat (function) : The statistic to compute from the subsample estimator values, default is np.nanmedian
	* est (function) : The estimator function computed for each sub-sample, this function must accept two float values and return a single float.
	* iter (int) : Number of sub-samples to compute, default is 1000
	* include_error (bool) : Whether to include the standard deviation of the estimated values along the statistic, if True then the return output is going to be a tuple of two floats, default is False.
	* print_values (bool) : For debugging purposes, whether to print the estimated values on screen.

	### Returns:
	A single float or a tuple of floats if include_error is enabled.
	### Notes:
	The function works by creating a sub-sample of the input of the same length then calling the estimator function for this sub-sample and appending the result value into a temporal list, after repeating this process a by number of iteration the final statistic is computed from the list of estimated values.

	Unlike boostrap.sampler, the estimator function must be given or an AssertionError may be raised. The estimator function must accept two list of values and return a single float value, the statistic function can be whatever function that accepts a list of values and returns a single value instead.


	The process of computing bootstrap statistics can be slow, for further optimizations consider compiling this module with mypyc.
	'''
	values : List[float] = list[float]()
	assert stat is not None and est is not None
	_sample0, _sample1 = sample
	assert _sample0 is not None and _sample1 is not None
	_size : int = len(_sample0)
	assert _size == len(_sample1)
	assert est is not None
	if _size < 3:
		if include_error:
			return np.nan, np.nan
		else:
			return np.nan
	if indices is None:
		indices = list(range(_size))
	for _ in range(iter):
		boot_sample : List[int] = np.random.choice(indices, replace = True, size = len(indices)).tolist()
		_s0 = [ _sample0[i] for i in boot_sample ]
		_s1 = [ _sample1[i] for i in boot_sample ]
		boot = est(_s0, _s1)
		values.append(boot)

	if print_values:
		print('values: ', values)
	if include_error:
		return stat(values), cast(float, np.nanstd(values))
	else:
		return stat(values)
	import random
	from typing import Any, Callable, Concatenate, List, ParamSpec, ParamSpecKwargs, Tuple, cast
	import numpy as np
	from numpy.typing import NDArray,ArrayLike
	from pandas import DataFrame, Series
	# ---------------- Bootstrapping -------------------------------
	_ArrayLike = NDArray[np.float_] \| None
	_NumFunction = Callable[..., float]

	def sampler(sample : List[float],
	stat : _NumFunction = np.nanmedian,
	est : _NumFunction = np.nanmedian,
	iter : int = 1000,
	include_error : bool = False,
	print_values : bool = False) -> Tuple[float, float] \| float:
	'''
	## Bootstrap sampler
	Randomly samples a list of values, computing the estimator function each time and returning the statistic of the computed values.

	### Parameters:
	* sample (list or array) : Input list of values
	* stat (function) : The statistic to compute from the subsample estimator values, default is np.nanmedian
	* est (function) : The estimator function computed for each sub-sample, default is np.nanmedian
	* iter (int) : Number of sub-samples to compute, default is 1000
	* include_error (bool) : Whether to include the standard deviation of the estimated values along the statistic, if True then the return output is going to be a tuple of two floats, default is False.
	* print_values (bool) : For debugging purposes, whether to print the estimated values on screen.

	### Returns:
	A single float or a tuple of floats if include_error is enabled.
	### Notes:
	The function works by creating a sub-sample of the input of the same length then calling the estimator function for this sub-sample and appending the result value into a temporal list, after repeating this process a by number of iteration the final statistic is computed from the list of estimated values.

	It's important that the estimator function and the statistic function both take a list or array of values and return a single float, for multivariate sampling see bootstrap.sampler_mv.

	The process of computing bootstrap statistics can be slow, for further optimizations consider compiling this module with mypyc.
	'''

	values : List[float] = list[float]()
	_size = len(sample)
	if _size < 3:
	if include_error:
	return np.nan, np.nan
	else:
	return np.nan
	i : int
	for i in range(iter):
	boot_sample : List[float] = np.random.choice(sample, replace = True, size = len(sample)).tolist()
	boot_mean = est(boot_sample)
	values.append(boot_mean)

	if print_values:
	print('values: ', values)
	if include_error:
	return stat(values), cast(float, np.nanstd(values))
	else:
	return stat(values)

	def sampler_mv(sample : Tuple[ List[float], List[float]],
	stat : _NumFunction = np.nanmedian,
	est : Callable[..., float] \| None = None,
	iter : int = 1000,
	indices : List[int]\|None = None,
	include_error : bool = False,
	print_values : bool = False) -> Tuple[float, float] \| float:
	'''
	## Multivariate bootstrap sampler
	Randomly samples a list of vectors, computing the estimator function each time and returning the statistic of the computed values.

	### Parameters:
	* sample (list or array of 2D vectors) : Input rows and columns, it must be provided as a tuple of lists or a 2D array.
	* stat (function) : The statistic to compute from the subsample estimator values, default is np.nanmedian
	* est (function) : The estimator function computed for each sub-sample, this function must accept two float values and return a single float.
	* iter (int) : Number of sub-samples to compute, default is 1000
	* include_error (bool) : Whether to include the standard deviation of the estimated values along the statistic, if True then the return output is going to be a tuple of two floats, default is False.
	* print_values (bool) : For debugging purposes, whether to print the estimated values on screen.

	### Returns:
	A single float or a tuple of floats if include_error is enabled.
	### Notes:
	The function works by creating a sub-sample of the input of the same length then calling the estimator function for this sub-sample and appending the result value into a temporal list, after repeating this process a by number of iteration the final statistic is computed from the list of estimated values.

	Unlike boostrap.sampler, the estimator function must be given or an AssertionError may be raised. The estimator function must accept two list of values and return a single float value, the statistic function can be whatever function that accepts a list of values and returns a single value instead.


	The process of computing bootstrap statistics can be slow, for further optimizations consider compiling this module with mypyc.
	'''
	values : List[float] = list[float]()
	assert stat is not None and est is not None
	_sample0, _sample1 = sample
	assert _sample0 is not None and _sample1 is not None
	_size : int = len(_sample0)
	assert _size == len(_sample1)
	assert est is not None
	if _size < 3:
	if include_error:
	return np.nan, np.nan
	else:
	return np.nan
	if indices is None:
	indices = list(range(_size))
	for _ in range(iter):
	boot_sample : List[int] = np.random.choice(indices, replace = True, size = len(indices)).tolist()
	_s0 = [ _sample0[i] for i in boot_sample ]
	_s1 = [ _sample1[i] for i in boot_sample ]
	boot = est(_s0, _s1)
	values.append(boot)

	if print_values:
	print('values: ', values)
	if include_error:
	return stat(values), cast(float, np.nanstd(values))
	else:
	return stat(values)