Skip to content

Instantly share code, notes, and snippets.

@dengemann
Last active December 16, 2015 11:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dengemann/5427106 to your computer and use it in GitHub Desktop.
Save dengemann/5427106 to your computer and use it in GitHub Desktop.
repeated measures ANOVA
# Author: d.engemann@fz-juelich.de
#
# License: BSD (3-clause)
# XXX clafify licensing
import numpy as np
from collections import namedtuple
from scipy.signal import detrend
from scipy import stats
aov_result = namedtuple('result', 'effect name df1 df2 fval pval')
def _code_effects(ii, width=0):
"""Aux function assisting to code effects using binary count pattern"""
ii += 1 # assume we get counts from an itarator
coding = []
while ii > 0:
coding.insert(0, np.float(np.remainder(ii, 2)))
ii = np.floor(ii / 2.)
new_length = width - len(coding)
if new_length < 0:
raise RuntimeError('Given `pad`, `ii`is too large')
return np.r_[np.zeros((new_length)), coding].astype('i4')
def repanova(data, factor_levels, factor_labels=None,
alpha=0.5, correction='greenhouse_gyser'):
""" Simple repeated measures ANOVA based on MATLAB code by Rik Henson
and pyvttble code by Roger Lew.
data : ndarray
the data in a replications X within subject factors structure
first factor repeats slowest.
factor_levels : list-like
The number of levels per factor.
factor_labels : list-like
The factor names.
alpha : float
The significance threshold.
correction : str | None.
The correction method to be employed. If None, nos sphericity
correction will be applied
"""
n_replications = len(data)
if factor_labels is None:
factor_labels = ['%d' for i in range(factor_levels)]
n_factors = len(factor_levels)
n_conditions = np.prod(factor_levels)
n_effects = 2 ** n_factors - 1
if data.shape[1] != n_conditions:
raise RuntimeError('data has %d conditions; design only %d' % (
data.shape[1], n_conditions))
# for each factor, create effect components (k * 2 structures)
sc, sy, = [], []
for n_levels in factor_levels:
sc.append([np.ones([n_levels, 1]),
detrend(np.eye(n_levels), type='constant')])
sy.append([np.ones([n_levels, 1]) / n_levels, np.eye(n_levels)])
y_res, b_, epsilon, efs, f_val, dfs, cdfs, p_val, sig = \
[{} for k in range(9)]
results = []
for idx, effect in enumerate(range(n_effects)):
coding = _code_effects(effect, width=n_factors)
c_, cy = [k[0][coding[n_factors - 1]] for k in sc, sy]
for ff in range(1, n_factors):
c_ind = coding[n_factors - (ff + 1)]
c_ = np.kron(c_, sc[ff][c_ind])
cy = np.kron(cy, sy[ff][c_ind])
y_ = np.dot(data, c_)
y_res[effect] = np.mean(np.dot(data, cy), axis=0)
df1 = np.linalg.matrix_rank(c_)
df2 = df1 * (n_replications - 1)
b_[effect] = np.mean(y_, axis=0)
ss = np.sum(y_ * b_[effect].T)
mse = (np.sum(np.diag(np.dot(y_.T, y_))) - ss) / df2
mss = ss / df1
if correction == 'greenhouse_gyser':
v_ = np.cov(y_, rowvar=False) # sample covariance
epsilon[effect] = np.trace(v_) ** 2 \
/ (df1 * np.trace(np.dot(v_.T, v_)))
else:
epsilon[effect] = 1
efs[effect] = n_effects - 1 - (np.where(coding == 1)[0] + 1)
f_val[effect] = mss / mse
dfs[effect] = df1, df2
cdfs[effect] = epsilon[effect] * dfs[effect]
p_val[effect] = stats.f(*dfs[effect]).sf(f_val[effect])
# XXX think about more suitable representation
sig[effect] = [None, None]
if p_val[effect] < alpha:
sig[effect][0] = effect
if p_val[effect] < alpha / n_effects:
sig[effect][1] = effect
if idx > 1: # XXX improve this.
ename = ' * '.join(factor_labels)
else:
ename = factor_labels[idx]
results.append(aov_result(effect, ename, cdfs[effect][0],
cdfs[effect][1], f_val[effect], p_val[effect]))
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment