Created
August 18, 2014 21:25
-
-
Save dhalpern/c241f89a68bc799a71bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
alpha = 0.1 | |
tau = 500 | |
theta = 650 | |
nArms = 2 | |
nPlays = 500 | |
gamma = 0.1 | |
nStates = 11 | |
nUnits = 14 | |
type Results | |
choices::Array{Float64,1} | |
rewards::Array{Float64,1} | |
end | |
function simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
wA = [ones(nUnits) for x = 1:nArms] | |
optimal = zeros(nPlays) | |
rewards = zeros(nPlays) | |
choices = zeros(nPlays) | |
i = zeros(nUnits) | |
prevI = zeros(nUnits) | |
sP = 0 | |
s = 0 | |
qSA = ones(nStates, nArms) * theta | |
for pi = 1:nPlays | |
sum = 0 | |
# Softmax | |
for a = 1:nArms | |
sum += exp(qSA[s + 1, a]/tau) | |
end | |
r = rand() | |
arm = 1 | |
gibbsSum = exp(qSA[s + 1, arm]/tau)/sum | |
while gibbsSum < r | |
arm += 1 | |
gibbsSum += exp(qSA[s + 1, arm]/tau)/sum | |
end | |
choices[pi] = arm | |
prevI = i | |
#Get reward | |
if arm == 1 | |
#Long-term | |
reward = 400 + 1000 * s/10 | |
if sP < 10 | |
sP += 1 | |
end | |
i[13] = 0 | |
i[14] = reward/1900 | |
else | |
#Short-term | |
reward = 900 + 1000 * s/10 | |
if sP > 0 | |
sP -= 1 | |
end | |
i[13] = reward/1900 | |
i[14] = 0 | |
end | |
i[1] = sP/10 | |
i[2:12] = 0 | |
i[sP + 2] = 1 | |
rewards[pi] = reward | |
maxA = 1 | |
if qSA[sP + 1, 1] <= qSA[sP + 1, 2] | |
maxA = 2 | |
end | |
#Update w | |
delta = reward + gamma * qSA[sP + 1, maxA] - qSA[s + 1, arm] | |
wA[arm] = wA[arm] .+ (alpha * delta * prevI) | |
qSA[s + 1, arm] = dot(wA[arm], i) | |
s = sP | |
end | |
return Results(choices, rewards) | |
end | |
function softmax(qSA, s, tau) | |
sum = 0 | |
for a = 1:nArms | |
sum += exp(qSA[(s + 1), a]/tau) | |
end | |
r = rand() | |
arm = 1 | |
gibbsSum = exp(qSA[s + 1, arm]/tau)/sum | |
while gibbsSum < r | |
arm += 1 | |
gibbsSum += exp(qSA[s + 1, arm]/tau)/sum | |
end | |
return arm | |
end | |
function simulateMixQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
wA = [ones(nUnits) for x = 1:nArms] | |
optimal = zeros(nPlays) | |
rewards = zeros(nPlays) | |
choices = zeros(nPlays) | |
i = zeros(nUnits) | |
prevI = zeros(nUnits) | |
sP = 0 | |
s = 0 | |
qSA = ones(nStates, nArms) * theta | |
arm = rand(1:2) | |
for pi = 1:nPlays | |
if rand() > .4 | |
arm = softmax(qSA, s, tau) | |
end | |
choices[pi] = arm | |
prevI = i | |
#Get reward | |
if arm == 1 | |
#Long-term | |
reward = 400 + 1000 * s/10 | |
if sP < 10 | |
sP += 1 | |
end | |
i[13] = 0 | |
i[14] = reward/1900 | |
else | |
#Short-term | |
reward = 900 + 1000 * s/10 | |
if sP > 0 | |
sP -= 1 | |
end | |
i[13] = reward/1900 | |
i[14] = 0 | |
end | |
i[1] = sP/10 | |
i[2:12] = 0 | |
i[sP + 2] = 1 | |
rewards[pi] = reward | |
maxA = 1 | |
if qSA[sP + 1, 1] <= qSA[sP + 1, 2] | |
maxA = 2 | |
end | |
#Update w | |
delta = reward + gamma * qSA[sP + 1, maxA] - qSA[s + 1, arm] | |
wA[arm] = wA[arm] .+ (alpha * delta * prevI) | |
qSA[s + 1, arm] = dot(wA[arm], i) | |
s = sP | |
end | |
return Results(choices, rewards) | |
end | |
function fitQ(params, choices, rewards, theta, nArms, nStates) | |
wA = [ones(nUnits) for x = 1:nArms] | |
ll = 0 | |
sP = 0 | |
s = 0 | |
i = zeros(nUnits) | |
prevI = zeros(nUnits) | |
qSA = ones(nStates, nArms) * theta | |
for ci = 1:length(choices) | |
sum = 0 | |
arm = choices[ci] | |
reward = rewards[ci] | |
# softmax | |
for a = 1:nArms | |
sum += exp(qSA[s + 1, a]/params[1]) | |
end | |
# probability of picking arm | |
prob = exp(qSA[s + 1, arm]/params[1])/sum | |
prevI = i | |
# fix for more states | |
if arm == 1 | |
if sP < 10 | |
sP += 1 | |
end | |
i[13] = 0 | |
i[14] = reward/1900 | |
else | |
if sP > 0 | |
sP -= 1 | |
end | |
i[13] = reward/1900 | |
i[14] = 0 | |
end | |
i[1] = sP/10 | |
i[2:12] = 0 | |
i[sP + 2] = 1 | |
# pick a' | |
maxA = 1 | |
if qSA[sP + 1, 1] <= qSA[sP + 1, 2] | |
maxA = 2 | |
end | |
delta = reward + params[3] * qSA[sP + 1, maxA] - qSA[s + 1, arm] | |
wA[arm] = wA[arm] .+ (params[2] * delta * prevI) | |
qSA[s + 1, arm] = dot(wA[arm], i) | |
s = sP | |
# negative log likelihood | |
ll += log(prob) | |
end | |
return -ll | |
end | |
nTaus = 10 | |
nAlphas = 10 | |
nGammas = 10 | |
nStarts = 50 | |
nSubs = 10 | |
mTau = 1900 | |
mAlpha = 1 | |
mGamma = 1 | |
#Distances to Best Fit | |
distances = DataFrame() | |
distances["Taus"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Alphas"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Gammas"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Mean Distance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Distance Variance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Tau Distance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Tau Variance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Alpha Distance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Alpha Variance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Gamma Distance"] = zeros(nTaus * nAlphas * nGammas) | |
distances["Gamma Variance"] = zeros(nTaus * nAlphas * nGammas) | |
index = 1 | |
for tau = mTau/nTaus:mTau/nTaus:mTau | |
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha | |
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma | |
minTaus = zeros(nSubs) | |
minAlphas = zeros(nSubs) | |
minGammas = zeros(nSubs) | |
for i = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
minLL = 1000 | |
minTau = 0 | |
minAlpha = 0 | |
minGamma = 0 | |
for j = 1:nStarts | |
sTau = rand() * mTau | |
sAlpha = rand() * mAlpha | |
sGamma = rand() * mGamma | |
o = optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma]) | |
if o.f_minimum < minLL | |
minLL = o.f_minimum | |
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum | |
end | |
end | |
end | |
distances["Taus"][index] = tau | |
distances["Alphas"][index] = alpha | |
distances["Gammas"][index] = gamma | |
allTDist = tau - minTaus | |
allADist = alpha - minAlphas | |
allGDist = gamma - minGammas | |
allDists = sqrt(allTDist.^2 + allADist.^2 + allGDist.^2) | |
distances["Mean Distance"][index] = mean(allDists) | |
distances["Distance Variance"][index] = var(allDists) | |
distances["Tau Distance"][index] = mean(allTDist) | |
distances["Tau Variance"][index] = var(allTDist) | |
distances["Alpha Distance"][index] = mean(allADist) | |
distances["Alpha Variance"][index] = var(allADist) | |
distances["Gamma Distance"][index] = mean(allGDist) | |
distances["Gamma Variance"][index] = var(allGDist) | |
index += 1 | |
end | |
end | |
end | |
#Confusion Matrix for Maximizing Parameters | |
m = classifier[:(Max .== 1), :] | |
confusion = DataFrame() | |
confusion["Taus"] = zeros(length(m[1])) | |
confusion["Alphas"] = zeros(length(m[1])) | |
confusion["Gammas"] = zeros(length(m[1])) | |
confusion["Median Fit Taus"] = zeros(length(m[1])) | |
confusion["Median Fit Alphas"] = zeros(length(m[1])) | |
confusion["Median Fit Gammas"] = zeros(length(m[1])) | |
confusion["Mean Fit Taus"] = zeros(length(m[1])) | |
confusion["Mean Fit Alphas"] = zeros(length(m[1])) | |
confusion["Mean Fit Gammas"] = zeros(length(m[1])) | |
confusion["MinTaus"] = [zeros(nSubs) for i = 1:length(m[1])] | |
confusion["MinAlphas"] = [zeros(nSubs) for i = 1:length(m[1])] | |
confusion["MinGammas"] = [zeros(nSubs) for i = 1:length(m[1])] | |
for index = 1:length(m[1]) | |
tau = m["Taus"][index] | |
alpha = m["Alphas"][index] | |
gamma = m["Gammas"][index] | |
minTaus = zeros(nSubs) | |
minAlphas = zeros(nSubs) | |
minGammas = zeros(nSubs) | |
for i = 1:nSubs | |
sim = simulateMixQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
minLL = 1000 | |
for j = 1:nStarts | |
sTau = rand() * mTau | |
sAlpha = rand() * mAlpha | |
sGamma = rand() * mGamma | |
o = Optim.optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma]) | |
if o.f_minimum < minLL | |
minLL = o.f_minimum | |
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum | |
end | |
end | |
end | |
confusion["Taus"][index] = tau | |
confusion["Alphas"][index] = alpha | |
confusion["Gammas"][index] = gamma | |
confusion["Median Fit Taus"][index] = median(minTaus) | |
confusion["Median Fit Alphas"][index] = median(minAlphas) | |
confusion["Median Fit Gammas"][index] = median(minGammas) | |
confusion["Mean Fit Taus"][index] = mean(minTaus) | |
confusion["Mean Fit Alphas"][index] = mean(minAlphas) | |
confusion["Mean Fit Gammas"][index] = mean(minGammas) | |
confusion["MinTaus"][index] = minTaus | |
confusion["MinAlphas"][index] = minAlphas | |
confusion["MinGammas"][index] = minGammas | |
end | |
#Learning curves for maximizing parameters | |
m = classifier[:(Max .== 1), :] | |
learncurves = DataFrame() | |
learncurves["Taus"] = m["Taus"] | |
learncurves["Alphas"] = m["Alphas"] | |
learncurves["Gammas"] = m["Gammas"] | |
learncurves["Percent LT by trial"] = [zeros(nPlays) for i = 1:length(m[1])] | |
for index = 1:length(m[1]) | |
tau = learncurves["Taus"][index] | |
alpha = learncurves["Alphas"][index] | |
gamma = learncurves["Gammas"][index] | |
for i = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
learncurves["Percent LT by trial"][index] += (sim.choices - 1) | |
end | |
learncurves["Percent LT by trial"][index] /= nSubs | |
learncurves["Percent LT by trial"][index] = 1 - learncurves["Percent LT by trial"][index] | |
end | |
#Fits from each subject | |
for index = 1:length(m[1]) | |
tau = m["Tau"][index] | |
alpha = m["Alpha"][index] | |
gamma = m["Gamma"][index] | |
minTaus = zeros(nSubs) | |
minAlphas = zeros(nSubs) | |
minGammas = zeros(nSubs) | |
for i = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
minLL = 1000 | |
for j = 1:nStarts | |
sTau = rand() * mTau | |
sAlpha = rand() * mAlpha | |
sGamma = rand() * mGamma | |
o = Optim.optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma]) | |
if o.f_minimum < minLL | |
minLL = o.f_minimum | |
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum | |
end | |
end | |
end | |
confusion["Taus"][index] = tau | |
confusion["Alphas"][index] = alpha | |
confusion["Gammas"][index] = gamma | |
confusion["Mean Fit Taus"][index] = mean(minTaus) | |
confusion["Mean Fit Alphas"][index] = mean(minAlphas) | |
confusion["Mean Fit Gammas"][index] = mean(minGammas) | |
confusion["MinTaus"][index] = minTaus | |
confusion["MinAlphas"][index] = minAlphas | |
confusion["MinGammas"][index] = minGammas | |
end | |
#Learning curves for fits | |
fitcurves = DataFrame() | |
taus = confusion["MinTaus"] | |
alphas = confusion["MinAlphas"] | |
gammas = confusion["MinGammas"] | |
fitcurves["Taus"] = confusion["Taus"] | |
fitcurves["Alphas"] = confusion["Alphas"] | |
fitcurves["Gammas"] = confusion["Gammas"] | |
fitcurves["Percent LT by trial"] = [zeros(nPlays) for i = 1:length(taus)] | |
for index = 1:length(taus) | |
for i = 1:length(taus[index]) | |
tau = taus[index][i] | |
alpha = alphas[index][i] | |
gamma = gammas[index][i] | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
fitcurves["Percent LT by trial"][index] += (sim.choices - 1) | |
end | |
fitcurves["Percent LT by trial"][index] /= nSubs | |
fitcurves["Percent LT by trial"][index] = 1 - learncurves["Percent LT by trial"][index] | |
end | |
nTaus = 10 | |
nAlphas = 10 | |
nGammas = 10 | |
mTau = 1000 | |
mAlpha = 1 | |
mGamma = 1 | |
#All possible log likelihoods | |
fits = DataFrame() | |
fits["Tau"] = zeros(nTaus * nAlphas * nGammas) | |
fits["Alpha"] = zeros(nTaus * nAlphas * nGammas) | |
fits["Gamma"] = zeros(nTaus * nAlphas * nGammas) | |
fits["LL"] = zeros(nTaus * nAlphas * nGammas) | |
index = 1 | |
for tau = mTau/nTaus:mTau/nTaus:mTau | |
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha | |
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma | |
fits["Tau"][index] = tau | |
fits["Alpha"][index] = alpha | |
fits["Gammas"][index] = gamma | |
fits["LL"][index] = fitQ([tau, alpha, gamma], sim.choices, sim.rewards, theta) | |
index += 1 | |
end | |
end | |
end | |
nTaus = 10 | |
nAlphas = 10 | |
nGammas = 10 | |
mTau = 1900 | |
mAlpha = 1 | |
mGamma = 1 | |
nArms = 2 | |
nPlays = 500 | |
nStates = 11 | |
nUnits = 14 | |
nSubs = 100 | |
#Classify parameter combinations as maximizing or meliorating | |
classifier = DataFrame() | |
classifier["Tau"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["Alpha"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["Gamma"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["nMax"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["nMel"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["Max"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["Mel"] = zeros(nTaus * nAlphas * nGammas) | |
classifier["Rand"] = zeros(nTaus * nAlphas * nGammas) | |
index = 1 | |
for tau = mTau/nTaus:mTau/nTaus:mTau | |
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha | |
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma | |
classifier["Tau"][index] = tau | |
classifier["Alpha"][index] = alpha | |
classifier["Gamma"][index] = gamma | |
nMax = 0 | |
nMel = 0 | |
for i = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
aveChoice = mean(sim.choices[400:nPlays] - 1) | |
if aveChoice <= .4 | |
nMax += 1 | |
elseif aveChoice >= .6 | |
nMel += 1 | |
end | |
end | |
classifier["nMax"][index] = nMax | |
classifier["nMel"][index] = nMel | |
if nMax >= 60 | |
classifier["Max"][index] = 1 | |
elseif nMel >= 60 | |
classifier["Mel"][index] = 1 | |
else | |
classifier["Rand"][index] = 1 | |
end | |
index += 1 | |
end | |
end | |
end | |
#Classify maximizing parameter fits | |
classifier = DataFrame() | |
l = length(confusion[1]) | |
classifier["Tau"] = confusion["Mean Fit Taus"] | |
classifier["Alpha"] = confusion["Mean Fit Alphas"] | |
classifier["Gamma"] = confusion["Mean Fit Gammas"] | |
classifier["nMax"] = zeros(l) | |
classifier["nMel"] = zeros(l) | |
classifier["Max"] = zeros(l) | |
classifier["Mel"] = zeros(l) | |
classifier["Rand"] = zeros(l) | |
for i = 1:l | |
tau = classifier["Tau"][i] | |
alpha = classifier["Alpha"][i] | |
gamma = classifier["Gamma"][i] | |
nMax = 0 | |
nMel = 0 | |
for index = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
aveChoice = mean(sim.choices[400:nPlays] - 1) | |
if aveChoice <= .4 | |
nMax += 1 | |
elseif aveChoice >= .6 | |
nMel += 1 | |
end | |
end | |
classifier["nMax"][i] = nMax | |
classifier["nMel"][i] = nMel | |
if nMax >= 60 | |
classifier["Max"][i] = 1 | |
elseif nMel >= 60 | |
classifier["Mel"][i] = 1 | |
else | |
classifier["Rand"][i] = 1 | |
end | |
end | |
#Classify maximizing parameter fits for each subject | |
classifier = DataFrame() | |
l = length(taus) | |
classifier["Tau"] = taus | |
classifier["Alpha"] = alphas | |
classifier["Gamma"] = gammas | |
classifier["nMax"] = zeros(l) | |
classifier["nMel"] = zeros(l) | |
classifier["Max"] = zeros(l) | |
classifier["Mel"] = zeros(l) | |
classifier["Rand"] = zeros(l) | |
for i = 1:l | |
tau = taus[i] | |
alpha = alphas[i] | |
gamma = gammas[i] | |
nMax = 0 | |
nMel = 0 | |
for index = 1:nSubs | |
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits) | |
aveChoice = mean(sim.choices[400:nPlays] - 1) | |
if aveChoice <= .4 | |
nMax += 1 | |
elseif aveChoice >= .6 | |
nMel += 1 | |
end | |
end | |
classifier["nMax"][i] = nMax | |
classifier["nMel"][i] = nMel | |
if nMax >= 60 | |
classifier["Max"][i] = 1 | |
elseif nMel >= 60 | |
classifier["Mel"][i] = 1 | |
else | |
classifier["Rand"][i] = 1 | |
end | |
end | |
r = classifier[:(Rand .== 1), :] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment