Skip to content

Instantly share code, notes, and snippets.

@kingjr
Last active November 26, 2018 17:04
Show Gist options
  • Save kingjr/23a53e6d822f3d830e035bb2abfd9735 to your computer and use it in GitHub Desktop.
Save kingjr/23a53e6d822f3d830e035bb2abfd9735 to your computer and use it in GitHub Desktop.
for david
import numpy as np
from sklearn.linear_model import RidgeCV
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict, KFold
def correlation(x, y):
a = (x - x.mean(0)) / x.std(0)
b = (y - y.mean(0)) / y.std(0)
return a.T @ b / x.shape[0]
def partial_correlation(x, y, z):
p_x = RidgeCV().fit(z, x).predict(z)
p_y = RidgeCV().fit(z, y).predict(z)
return correlation(x - p_x, y - p_y)
def make_data(n_samples=200, n_features=50, n_sources=50, n_sensors=50,
snr=1., seed=0):
np.random.seed(seed)
# Define covarying stimulus features
# Make interesting covariance matrix
c = np.sin(np.linspace(-np.pi, np.pi, n_features))
Cy = np.array([np.roll(c, i) for i in range(n_features)])
Cy += np.random.randn(n_features, n_features) / 2.
Cy = Cy.dot(Cy.T) / n_features # sym pos-semidefin
Y = np.random.multivariate_normal(np.zeros(n_features), Cy, n_samples)
# Define encoding operator:
E = np.zeros((n_sources, n_features)) # Features of Y represented in the brain activity
E[:, :2] = np.random.randn(n_sources, 2) # Only first two features cause brain response
N = np.random.randn(n_samples, n_sources) # Brain noise
S = E.dot(Y.T).T + N / snr # Brain activity
# Define forward operators: observations of brain activity are linear
# mixture of underlying noisy sources:
F = np.random.randn(n_sensors, n_sources) # Forward model
X = F.dot(S.T).T # Sensor recordings
return X, Y
if __name__ == "__main__":
for n_samples, snr in ((200, 1.), (1000, .1)):
x, y = make_data(n_samples=n_samples, snr=snr)
n_samples, n_sensors = x.shape
n_samples, n_features = y.shape
fig, (ax0, ax1) = plt.subplots(2)
# David's
results = np.zeros(y.shape[1])
for i in range(n_features):
y_i = y[:, i].reshape(-1, 1)
y_not_i = np.delete(y, i, axis=1)
results[i] = np.sum(np.abs(partial_correlation(x, y_i, y_not_i)))
ax0.bar(range(n_features), results)
ax0.set_title('David: n=%i, snr=%.3f' % (n_samples, snr))
# JR's
n_splits = 20
cv = KFold(n_splits, shuffle=True, random_state=0)
betas = np.zeros((n_splits, n_features))
for split, (train, test) in enumerate(cv.split(x, y)):
ols = RidgeCV()
ols.fit(x[train], y[train])
y_hat = ols.predict(x[test])
ols = RidgeCV()
ols.fit(y[test], y_hat)
betas[split] = np.diag(ols.coef_)
# Plot diagonal beta
ax1.bar(range(n_features), betas.mean(0))
ax1.set_title('JR: n=%i, snr=%.3f' % (n_samples, snr))
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment