Skip to content

Instantly share code, notes, and snippets.

@robcarver17
Last active January 21, 2022 00:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save robcarver17/fe1fab08ba1e78b9550a9671ac4ae8b4 to your computer and use it in GitHub Desktop.
Save robcarver17/fe1fab08ba1e78b9550a9671ac4ae8b4 to your computer and use it in GitHub Desktop.
Bin plotting
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
def plot_results_for_bin_size(size, pd_result, centre_on_mean = False):
bins = get_bins_for_size(size, pd_result, centre_on_mean = centre_on_mean)
results = calculate_results_for_bins(bins, pd_result)
avg_results = [x.mean() for x in results]
centre_bins = [np.mean([bins[idx], bins[idx - 1]]) for idx in range(len(bins))[1:]]
plt.plot(centre_bins, avg_results)
ans = print_t_stats(results)
return ans
def print_t_stats(results):
t_results = []
print("For each bin:")
for idx in range(len(results))[1:]:
t_stat = stats.ttest_ind(results[idx], results[idx-1], axis=0, equal_var=True)
t_results.append(t_stat)
print(t_stat)
print("Comparing final and first bins:")
t_stat = stats.ttest_ind(results[-1], results[0], axis=0, equal_var=True)
t_results.append(t_stat)
print(t_stat)
return t_results
def get_bins_for_size(size, pd_result, centre_on_mean = False):
if centre_on_mean:
centre = pd_result.x.mean()
else:
centre = 0
lower_quantiles = quantile_in_range(size, pd_result, min=centre-0.001)
upper_quantiles = quantile_in_range(size, pd_result, max=centre+0.001)
return lower_quantiles[:-1]+[centre]+upper_quantiles[1:]
def quantile_in_range(size, pd_result, min=-9999., max=9999.):
xvar = pd_result.x
signed_distribution = xvar[(xvar>min) & (xvar<max)]
quantile_ranges = get_quantile_ranges(size)
quantile_points = [signed_distribution.quantile(q) for q in quantile_ranges]
return quantile_points
def get_quantile_ranges(size):
quantile_ranges = np.arange(0,1.0000001,1.0/size)
return quantile_ranges
def calculate_results_for_bins(bins, pd_result):
results = []
for idx in range(len(bins))[1:]:
selected_results = pd_result[(pd_result.x>bins[idx-1]) & (pd_result.x < bins[idx])]
results.append(selected_results.y)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment