kingjr/example_for_david.py

## example_for_david.py
import numpy as np
from sklearn.linear_model import RidgeCV
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict, KFold


def correlation(x, y):
    a = (x - x.mean(0)) / x.std(0)
    b = (y - y.mean(0)) / y.std(0)
    return a.T @ b / x.shape[0]


def partial_correlation(x, y, z):
    p_x = RidgeCV().fit(z, x).predict(z)
    p_y = RidgeCV().fit(z, y).predict(z)
    return correlation(x - p_x, y - p_y)


def make_data(n_samples=200, n_features=50, n_sources=50, n_sensors=50,
              snr=1., seed=0):
    np.random.seed(seed)
    # Define covarying stimulus features
    # Make interesting covariance matrix
    c = np.sin(np.linspace(-np.pi, np.pi, n_features))
    Cy = np.array([np.roll(c, i) for i in range(n_features)])
    Cy += np.random.randn(n_features, n_features) / 2.
    Cy = Cy.dot(Cy.T) / n_features # sym pos-semidefin
    Y = np.random.multivariate_normal(np.zeros(n_features), Cy, n_samples)

    # Define encoding operator:
    E = np.zeros((n_sources, n_features))     # Features of Y represented in the brain activity
    E[:, :2] = np.random.randn(n_sources, 2)  # Only first two features cause brain response
    N = np.random.randn(n_samples, n_sources) # Brain noise
    S = E.dot(Y.T).T + N / snr                # Brain activity

    # Define forward operators: observations of brain activity are linear
    # mixture of underlying noisy sources:
    F = np.random.randn(n_sensors, n_sources)  # Forward model
    X = F.dot(S.T).T     # Sensor recordings
    return X, Y

if __name__ == "__main__":
    for n_samples, snr in ((200, 1.), (1000, .1)):
        x, y = make_data(n_samples=n_samples, snr=snr)
        n_samples, n_sensors = x.shape
        n_samples, n_features = y.shape

        fig, (ax0, ax1) = plt.subplots(2)

        # David's
        results = np.zeros(y.shape[1])
        for i in range(n_features):
            y_i = y[:, i].reshape(-1, 1)
            y_not_i = np.delete(y, i, axis=1)
            results[i] = np.sum(np.abs(partial_correlation(x, y_i, y_not_i)))
        ax0.bar(range(n_features), results)
        ax0.set_title('David: n=%i, snr=%.3f' % (n_samples, snr))

        # JR's
        n_splits = 20
        cv = KFold(n_splits, shuffle=True, random_state=0)
        betas = np.zeros((n_splits, n_features))
        for split, (train, test) in enumerate(cv.split(x, y)):
            ols = RidgeCV()
            ols.fit(x[train], y[train])
            y_hat = ols.predict(x[test])

            ols = RidgeCV()
            ols.fit(y[test], y_hat)
            betas[split] = np.diag(ols.coef_)

        # Plot diagonal beta
        ax1.bar(range(n_features), betas.mean(0))
        ax1.set_title('JR: n=%i, snr=%.3f' % (n_samples, snr))

    plt.tight_layout()
    plt.show()
	import numpy as np
	from sklearn.linear_model import RidgeCV
	from matplotlib import pyplot as plt
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import cross_val_predict, KFold


	def correlation(x, y):
	a = (x - x.mean(0)) / x.std(0)
	b = (y - y.mean(0)) / y.std(0)
	return a.T @ b / x.shape[0]


	def partial_correlation(x, y, z):
	p_x = RidgeCV().fit(z, x).predict(z)
	p_y = RidgeCV().fit(z, y).predict(z)
	return correlation(x - p_x, y - p_y)


	def make_data(n_samples=200, n_features=50, n_sources=50, n_sensors=50,
	snr=1., seed=0):
	np.random.seed(seed)
	# Define covarying stimulus features
	# Make interesting covariance matrix
	c = np.sin(np.linspace(-np.pi, np.pi, n_features))
	Cy = np.array([np.roll(c, i) for i in range(n_features)])
	Cy += np.random.randn(n_features, n_features) / 2.
	Cy = Cy.dot(Cy.T) / n_features # sym pos-semidefin
	Y = np.random.multivariate_normal(np.zeros(n_features), Cy, n_samples)

	# Define encoding operator:
	E = np.zeros((n_sources, n_features)) # Features of Y represented in the brain activity
	E[:, :2] = np.random.randn(n_sources, 2) # Only first two features cause brain response
	N = np.random.randn(n_samples, n_sources) # Brain noise
	S = E.dot(Y.T).T + N / snr # Brain activity

	# Define forward operators: observations of brain activity are linear
	# mixture of underlying noisy sources:
	F = np.random.randn(n_sensors, n_sources) # Forward model
	X = F.dot(S.T).T # Sensor recordings
	return X, Y

	if __name__ == "__main__":
	for n_samples, snr in ((200, 1.), (1000, .1)):
	x, y = make_data(n_samples=n_samples, snr=snr)
	n_samples, n_sensors = x.shape
	n_samples, n_features = y.shape

	fig, (ax0, ax1) = plt.subplots(2)

	# David's
	results = np.zeros(y.shape[1])
	for i in range(n_features):
	y_i = y[:, i].reshape(-1, 1)
	y_not_i = np.delete(y, i, axis=1)
	results[i] = np.sum(np.abs(partial_correlation(x, y_i, y_not_i)))
	ax0.bar(range(n_features), results)
	ax0.set_title('David: n=%i, snr=%.3f' % (n_samples, snr))

	# JR's
	n_splits = 20
	cv = KFold(n_splits, shuffle=True, random_state=0)
	betas = np.zeros((n_splits, n_features))
	for split, (train, test) in enumerate(cv.split(x, y)):
	ols = RidgeCV()
	ols.fit(x[train], y[train])
	y_hat = ols.predict(x[test])

	ols = RidgeCV()
	ols.fit(y[test], y_hat)
	betas[split] = np.diag(ols.coef_)

	# Plot diagonal beta
	ax1.bar(range(n_features), betas.mean(0))
	ax1.set_title('JR: n=%i, snr=%.3f' % (n_samples, snr))

	plt.tight_layout()
	plt.show()