Skip to content

Instantly share code, notes, and snippets.

@larsoner
Created October 21, 2021 19:08
Show Gist options
  • Save larsoner/5be1ae1e7c2dcb8cf479d599d287bd4e to your computer and use it in GitHub Desktop.
Save larsoner/5be1ae1e7c2dcb8cf479d599d287bd4e to your computer and use it in GitHub Desktop.
"""Hotelling T2 example.
Following the notation in
https://en.wikipedia.org/wiki/Hotelling%27s_T-squared_distribution#Two-sample_statistic
"""
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
rng = np.random.RandomState(42)
n_x = 10
n_y = 8
p = 3
assert p >= 2
x, y = rng.rand(n_x, p) - 0.5, rng.rand(n_y, p) - 0.5
x[:, :2] += [0.2, 0.1] # toward upper right
y[:, :2] += [-0.1, -0.15] # toward lower left
# Just plot the XY plane, where the difference is (could make it different in
# 3D but 2D is easier to visualize)
fig, ax = plt.subplots(figsize=(4, 4))
for data, color in ((x, 'tab:blue'), (y, 'tab:orange')):
x_ = y_ = np.zeros(len(data))
u, v = data[:, :2].T
kwargs = dict(units='xy', angles='xy', scale_units='xy', scale=1.)
ax.quiver(x_, y_, u, v, color=color, lw=1, alpha=0.2, **kwargs)
ax.quiver(0, 0, np.mean(u), np.mean(v), color=color, lw=2)
ax.set(xlim=[-1, 1], ylim=[-1, 1])
# Separate t-tests
for ci, coord in enumerate('XYZ'):
t, p = stats.ttest_ind(x[:, ci], y[:, ci])
print(f'{coord} p={p:0.4f}')
# Hotelling T2
# Unbiased pooled covariance matrix
sigma_x, sigma_y = np.cov(x.T, ddof=1), np.cov(y.T, ddof=1)
sigma = ((n_x - 1) * sigma_x + (n_y - 1) * sigma_y) / (n_x + n_y - 2)
delta = np.mean(x, axis=0) - np.mean(y, axis=0)
t2 = (n_x * n_y) / (n_x + n_y) * (delta @ np.linalg.inv(sigma) @ delta)
# A scaled version of this is F distributed
df1, df2 = p, n_x + n_y - 1 - p
F = t2 * df2 / ((n_x + n_y + 2) * p)
p = 1 - stats.f.cdf(F, df1, df2)
print(f'\nH p={p:0.4f}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment