Skip to content

Instantly share code, notes, and snippets.

@kingjr
Last active February 22, 2019 23:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingjr/705cb16407b9a583a811441274da7231 to your computer and use it in GitHub Desktop.
Save kingjr/705cb16407b9a583a811441274da7231 to your computer and use it in GitHub Desktop.
linear_or_categorical.py
"""Fit sigmoid with parallel seeds to find global minimum.
"""
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from scipy.stats import pearsonr, wilcoxon
from sklearn.base import BaseEstimator
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from pandas import DataFrame
from tqdm import tqdm
class SigmoidRegression(BaseEstimator):
def __init__(self, n_epochs=5000, n_seeds=100, lr=1e-3):
self.n_epochs = n_epochs
self.n_seeds = n_seeds
self.lr = lr
def _sigmoid(self, X, a, b, c, d):
exp = np.exp
if isinstance(X, torch.Tensor):
exp = torch.exp
return a / (1 + exp(-c * (X - d))) + b
def _checkX(self, X):
x = np.squeeze(X)
assert x.ndim == 1
return x
def _to_tensor(self, z):
if isinstance(z, list):
z = torch.tensor(z, dtype=torch.float)
elif isinstance(z, np.ndarray):
z = torch.from_numpy(z).float()
return z
def fit(self, X, y):
self.loss_ = list()
x = self._to_tensor(self._checkX(X))
y = self._to_tensor(y)
X = x.unsqueeze(1)
Y = y.unsqueeze(1) * torch.ones(len(y), self.n_seeds)
abcd = nn.Parameter(torch.randn(4, self.n_seeds)).float()
a, b, c, d = abcd
optimizer = torch.optim.Adam([abcd, ], lr=self.lr)
loss = nn.MSELoss()
for epoch in range(self.n_epochs):
optimizer.zero_grad()
Y_hat = self._sigmoid(X, a, b, c, d)
loss(Y_hat, Y).backward()
optimizer.step()
losses = list()
for a, b, c, d in abcd.transpose(0, 1):
y_hat = self._sigmoid(x, a, b, c, d)
losses.append(loss(y_hat, y).detach().numpy())
abcd = abcd[:, np.argmin(losses)]
self.coef_ = abcd.detach().numpy()
return self
def predict(self, X):
x = self._checkX(X)
return self._sigmoid(x, *self.coef_)
def r_scorer(y_true, y_pred):
return pearsonr(y_true, y_pred)[0]
def linear_or_categorical(X, Y, cv=7):
"""Model comparison: linear or categorical effects of X onto Y
The trials are first averaged per level of evidence within each
subject. An angular mean is applied if X is complex (relevant
for experiment 1.).
Two models are then fit to predict, within each subject, the
response (e.g. classifier's prediction) from the level of evidence.
The linear model is simply a linear regression with a bias:
y = a*x + b
The categorical model is a sigmoid regression, with all biases:
y = a / (1 + e^(-c * (x-d))) + b
The models are fit in cv, and compared on their ability to predict
the test trials (as measured with a correlation r).
The resulting r are compared across subject with a wilcoxon to know:
- whether the linear model predicts the response Y above chance.
- whether the sigmoidal model predicts Y above chance
- whether the sigmoidal models better predicts Y than the linear
Parameters
----------
X : list of list, shape(n_subjects, n_trials)
The single trial level of evidence.
Y : list of list, shape(n_subjects, n_trials)
The single trial response.
Results
-------
results : DataFrame
To retrieve the mean and sem values
pval_linear : float
pval_categorical : float
pval_strictcat : float
What we should report as truly categorical
"""
assert len(X) == len(Y)
results = list()
models = dict(linear=LinearRegression(),
categorical=SigmoidRegression())
cv = StratifiedKFold(cv, shuffle=True) if isinstance(cv, int) else cv
for name, model in models.items():
for subject, (x, y) in enumerate(zip(tqdm(X), Y)):
z = LabelEncoder().fit_transform(x)
scores = list()
for train, test in cv.split(y, z):
split = np.zeros(len(x))
split[train] = 1
data = DataFrame(dict(x=x, y=y, z=z, train=split))
# Average per level of evidence
train = data.query('train==True')
test = data.query('train==False')
x_train = train.groupby('z')['x'].mean().values
y_train = train.groupby('z')['y'].mean().values
x_test = test.groupby('z')['x'].mean().values
y_test = test.groupby('z')['y'].mean().values
# deal with angular prediction (for behavior of Exp 1 only)
if np.any(np.iscomplex(y_train)):
y_train = np.angle(y_train)
y_test = np.angle(y_test)
# fit
model.fit(x_train[:, None], y_train)
# predict
y_hat = model.predict(x_test[:, None])
# score
r, _ = pearsonr(y_hat, y_test)
scores.append(r)
results.append(dict(r=np.mean(scores, 0),
subject=subject,
model=name))
# Compare linear versus categorical predictions across subjects
results = DataFrame(results)
# Can a linear predict above chance?
linear = results.query('model=="linear"')['r'].values
p_linear = wilcoxon(linear)[1]
# Can a categorical predict above chance?
categorical = results.query('model=="categorical"')['r'].values
p_categorical = wilcoxon(categorical)[1]
# Can a categorical predict above chance better than a linear model?
diff = categorical - linear
diff = np.nan_to_num(diff * (diff >= 0.), 0)
p_strict = wilcoxon(diff)[1]
return linear, categorical, p_linear, p_categorical, p_strict
if __name__ == '__main__':
x = [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]
y = [-0.311, 0.0815, -0.309, -0.117, -0.389,
0.525, 0.495, 0.334, 0.486, 0.791, 0.349]
model = SigmoidRegression()
model.fit(x, y)
plt.scatter(x, y)
x = np.linspace(0, 1)
plt.plot(x, model.predict(x))
# test linear_categorical
X = list()
Y = list()
for subject in range(15):
# mimic evidence at each trials
X.append(np.around(np.linspace(0, 1, 100), 1))
# nonlinear response (e.g. classifier predict_proba) at each trial
Y.append(1. * (np.linspace(0, 1, 100) > .5))
# add noise to response
Y[-1] += np.random.randn(len(Y[-1]))
# compute results
linear, categ, p_lin, p_cat, p_strict = linear_or_categorical(X, Y, cv=2)
assert p_cat < 0.01
assert p_strict < 0.01 # strictly categorical
X = list()
Y = list()
for subject in range(15):
X.append(np.around(np.linspace(0, 1, 100), 1))
Y.append(1.*X[-1])
Y[-1] += np.random.randn(len(Y[-1]))
linear, categ, p_lin, p_cat, p_strict = linear_or_categorical(X, Y, cv=2)
assert p_lin < 0.01
assert p_cat < 0.01
assert p_strict > 0.01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment