Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Bin plotting
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
def plot_results_for_bin_size(size, pd_result, centre_on_mean = False):
bins = get_bins_for_size(size, pd_result, centre_on_mean = centre_on_mean)
results = calculate_results_for_bins(bins, pd_result)
avg_results = [x.mean() for x in results]
centre_bins = [np.mean([bins[idx], bins[idx - 1]]) for idx in range(len(bins))[1:]]
plt.plot(centre_bins, avg_results)
ans = print_t_stats(results)
return ans
def print_t_stats(results):
t_results = []
print("For each bin:")
for idx in range(len(results))[1:]:
t_stat = stats.ttest_ind(results[idx], results[idx-1], axis=0, equal_var=True)
print("Comparing final and first bins:")
t_stat = stats.ttest_ind(results[-1], results[0], axis=0, equal_var=True)
return t_results
def get_bins_for_size(size, pd_result, centre_on_mean = False):
if centre_on_mean:
centre = pd_result.x.mean()
centre = 0
lower_quantiles = quantile_in_range(size, pd_result, min=centre-0.001)
upper_quantiles = quantile_in_range(size, pd_result, max=centre+0.001)
return lower_quantiles[:-1]+[centre]+upper_quantiles[1:]
def quantile_in_range(size, pd_result, min=-9999., max=9999.):
xvar = pd_result.x
signed_distribution = xvar[(xvar>min) & (xvar<max)]
quantile_ranges = get_quantile_ranges(size)
quantile_points = [signed_distribution.quantile(q) for q in quantile_ranges]
return quantile_points
def get_quantile_ranges(size):
quantile_ranges = np.arange(0,1.0000001,1.0/size)
return quantile_ranges
def calculate_results_for_bins(bins, pd_result):
results = []
for idx in range(len(bins))[1:]:
selected_results = pd_result[(pd_result.x>bins[idx-1]) & (pd_result.x < bins[idx])]
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment