Last active
November 26, 2018 17:04
-
-
Save kingjr/23a53e6d822f3d830e035bb2abfd9735 to your computer and use it in GitHub Desktop.
for david
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.linear_model import RidgeCV | |
from matplotlib import pyplot as plt | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import cross_val_predict, KFold | |
def correlation(x, y): | |
a = (x - x.mean(0)) / x.std(0) | |
b = (y - y.mean(0)) / y.std(0) | |
return a.T @ b / x.shape[0] | |
def partial_correlation(x, y, z): | |
p_x = RidgeCV().fit(z, x).predict(z) | |
p_y = RidgeCV().fit(z, y).predict(z) | |
return correlation(x - p_x, y - p_y) | |
def make_data(n_samples=200, n_features=50, n_sources=50, n_sensors=50, | |
snr=1., seed=0): | |
np.random.seed(seed) | |
# Define covarying stimulus features | |
# Make interesting covariance matrix | |
c = np.sin(np.linspace(-np.pi, np.pi, n_features)) | |
Cy = np.array([np.roll(c, i) for i in range(n_features)]) | |
Cy += np.random.randn(n_features, n_features) / 2. | |
Cy = Cy.dot(Cy.T) / n_features # sym pos-semidefin | |
Y = np.random.multivariate_normal(np.zeros(n_features), Cy, n_samples) | |
# Define encoding operator: | |
E = np.zeros((n_sources, n_features)) # Features of Y represented in the brain activity | |
E[:, :2] = np.random.randn(n_sources, 2) # Only first two features cause brain response | |
N = np.random.randn(n_samples, n_sources) # Brain noise | |
S = E.dot(Y.T).T + N / snr # Brain activity | |
# Define forward operators: observations of brain activity are linear | |
# mixture of underlying noisy sources: | |
F = np.random.randn(n_sensors, n_sources) # Forward model | |
X = F.dot(S.T).T # Sensor recordings | |
return X, Y | |
if __name__ == "__main__": | |
for n_samples, snr in ((200, 1.), (1000, .1)): | |
x, y = make_data(n_samples=n_samples, snr=snr) | |
n_samples, n_sensors = x.shape | |
n_samples, n_features = y.shape | |
fig, (ax0, ax1) = plt.subplots(2) | |
# David's | |
results = np.zeros(y.shape[1]) | |
for i in range(n_features): | |
y_i = y[:, i].reshape(-1, 1) | |
y_not_i = np.delete(y, i, axis=1) | |
results[i] = np.sum(np.abs(partial_correlation(x, y_i, y_not_i))) | |
ax0.bar(range(n_features), results) | |
ax0.set_title('David: n=%i, snr=%.3f' % (n_samples, snr)) | |
# JR's | |
n_splits = 20 | |
cv = KFold(n_splits, shuffle=True, random_state=0) | |
betas = np.zeros((n_splits, n_features)) | |
for split, (train, test) in enumerate(cv.split(x, y)): | |
ols = RidgeCV() | |
ols.fit(x[train], y[train]) | |
y_hat = ols.predict(x[test]) | |
ols = RidgeCV() | |
ols.fit(y[test], y_hat) | |
betas[split] = np.diag(ols.coef_) | |
# Plot diagonal beta | |
ax1.bar(range(n_features), betas.mean(0)) | |
ax1.set_title('JR: n=%i, snr=%.3f' % (n_samples, snr)) | |
plt.tight_layout() | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment