Zmey56/gist:e0a848618ef60b3be94b061544d91c8a

## gistfile1.txt
def get_bootstrap(
    data_column_1, # numeric values of the first sample
    data_column_2, # numeric values of the second sample
    boot_it = 10000, # number of bootstrap subsamples
    statistic = np.mean, # statistics of interest to us
    bootstrap_conf_level = 0.95 # significance level
):
    boot_len = max([len(data_column_1), len(data_column_2)])
    boot_data = []
    for i in range(boot_it): # extracting subsamples
        samples_1 = data_column_1.sample(
            boot_len,
            replace = True # return parameter
        ).values

        samples_2 = data_column_2.sample(
            boot_len, # to preserve the variance, we take the same sample size
            replace = True
        ).values

        boot_data.append(statistic(samples_1-samples_2))
    pd_boot_data = pd.DataFrame(boot_data)

    left_quant = (1 - bootstrap_conf_level)/2
    right_quant = 1 - (1 - bootstrap_conf_level) / 2
    quants = pd_boot_data.quantile([left_quant, right_quant])

    p_1 = norm.cdf(
        x = 0,
        loc = np.mean(boot_data),
        scale = np.std(boot_data)
    )
    p_2 = norm.cdf(
        x = 0,
        loc = -np.mean(boot_data),
        scale = np.std(boot_data)
    )
    p_value = min(p_1, p_2) * 2

    return {"p_value": p_value}
	def get_bootstrap(
	data_column_1, # numeric values of the first sample
	data_column_2, # numeric values of the second sample
	boot_it = 10000, # number of bootstrap subsamples
	statistic = np.mean, # statistics of interest to us
	bootstrap_conf_level = 0.95 # significance level
	):
	boot_len = max([len(data_column_1), len(data_column_2)])
	boot_data = []
	for i in range(boot_it): # extracting subsamples
	samples_1 = data_column_1.sample(
	boot_len,
	replace = True # return parameter
	).values

	samples_2 = data_column_2.sample(
	boot_len, # to preserve the variance, we take the same sample size
	replace = True
	).values

	boot_data.append(statistic(samples_1-samples_2))
	pd_boot_data = pd.DataFrame(boot_data)

	left_quant = (1 - bootstrap_conf_level)/2
	right_quant = 1 - (1 - bootstrap_conf_level) / 2
	quants = pd_boot_data.quantile([left_quant, right_quant])

	p_1 = norm.cdf(
	x = 0,
	loc = np.mean(boot_data),
	scale = np.std(boot_data)
	)
	p_2 = norm.cdf(
	x = 0,
	loc = -np.mean(boot_data),
	scale = np.std(boot_data)
	)
	p_value = min(p_1, p_2) * 2

	return {"p_value": p_value}