Skip to content

Instantly share code, notes, and snippets.

@dhalpern
Created August 18, 2014 21:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dhalpern/c241f89a68bc799a71bb to your computer and use it in GitHub Desktop.
Save dhalpern/c241f89a68bc799a71bb to your computer and use it in GitHub Desktop.
alpha = 0.1
tau = 500
theta = 650
nArms = 2
nPlays = 500
gamma = 0.1
nStates = 11
nUnits = 14
type Results
choices::Array{Float64,1}
rewards::Array{Float64,1}
end
function simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
wA = [ones(nUnits) for x = 1:nArms]
optimal = zeros(nPlays)
rewards = zeros(nPlays)
choices = zeros(nPlays)
i = zeros(nUnits)
prevI = zeros(nUnits)
sP = 0
s = 0
qSA = ones(nStates, nArms) * theta
for pi = 1:nPlays
sum = 0
# Softmax
for a = 1:nArms
sum += exp(qSA[s + 1, a]/tau)
end
r = rand()
arm = 1
gibbsSum = exp(qSA[s + 1, arm]/tau)/sum
while gibbsSum < r
arm += 1
gibbsSum += exp(qSA[s + 1, arm]/tau)/sum
end
choices[pi] = arm
prevI = i
#Get reward
if arm == 1
#Long-term
reward = 400 + 1000 * s/10
if sP < 10
sP += 1
end
i[13] = 0
i[14] = reward/1900
else
#Short-term
reward = 900 + 1000 * s/10
if sP > 0
sP -= 1
end
i[13] = reward/1900
i[14] = 0
end
i[1] = sP/10
i[2:12] = 0
i[sP + 2] = 1
rewards[pi] = reward
maxA = 1
if qSA[sP + 1, 1] <= qSA[sP + 1, 2]
maxA = 2
end
#Update w
delta = reward + gamma * qSA[sP + 1, maxA] - qSA[s + 1, arm]
wA[arm] = wA[arm] .+ (alpha * delta * prevI)
qSA[s + 1, arm] = dot(wA[arm], i)
s = sP
end
return Results(choices, rewards)
end
function softmax(qSA, s, tau)
sum = 0
for a = 1:nArms
sum += exp(qSA[(s + 1), a]/tau)
end
r = rand()
arm = 1
gibbsSum = exp(qSA[s + 1, arm]/tau)/sum
while gibbsSum < r
arm += 1
gibbsSum += exp(qSA[s + 1, arm]/tau)/sum
end
return arm
end
function simulateMixQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
wA = [ones(nUnits) for x = 1:nArms]
optimal = zeros(nPlays)
rewards = zeros(nPlays)
choices = zeros(nPlays)
i = zeros(nUnits)
prevI = zeros(nUnits)
sP = 0
s = 0
qSA = ones(nStates, nArms) * theta
arm = rand(1:2)
for pi = 1:nPlays
if rand() > .4
arm = softmax(qSA, s, tau)
end
choices[pi] = arm
prevI = i
#Get reward
if arm == 1
#Long-term
reward = 400 + 1000 * s/10
if sP < 10
sP += 1
end
i[13] = 0
i[14] = reward/1900
else
#Short-term
reward = 900 + 1000 * s/10
if sP > 0
sP -= 1
end
i[13] = reward/1900
i[14] = 0
end
i[1] = sP/10
i[2:12] = 0
i[sP + 2] = 1
rewards[pi] = reward
maxA = 1
if qSA[sP + 1, 1] <= qSA[sP + 1, 2]
maxA = 2
end
#Update w
delta = reward + gamma * qSA[sP + 1, maxA] - qSA[s + 1, arm]
wA[arm] = wA[arm] .+ (alpha * delta * prevI)
qSA[s + 1, arm] = dot(wA[arm], i)
s = sP
end
return Results(choices, rewards)
end
function fitQ(params, choices, rewards, theta, nArms, nStates)
wA = [ones(nUnits) for x = 1:nArms]
ll = 0
sP = 0
s = 0
i = zeros(nUnits)
prevI = zeros(nUnits)
qSA = ones(nStates, nArms) * theta
for ci = 1:length(choices)
sum = 0
arm = choices[ci]
reward = rewards[ci]
# softmax
for a = 1:nArms
sum += exp(qSA[s + 1, a]/params[1])
end
# probability of picking arm
prob = exp(qSA[s + 1, arm]/params[1])/sum
prevI = i
# fix for more states
if arm == 1
if sP < 10
sP += 1
end
i[13] = 0
i[14] = reward/1900
else
if sP > 0
sP -= 1
end
i[13] = reward/1900
i[14] = 0
end
i[1] = sP/10
i[2:12] = 0
i[sP + 2] = 1
# pick a'
maxA = 1
if qSA[sP + 1, 1] <= qSA[sP + 1, 2]
maxA = 2
end
delta = reward + params[3] * qSA[sP + 1, maxA] - qSA[s + 1, arm]
wA[arm] = wA[arm] .+ (params[2] * delta * prevI)
qSA[s + 1, arm] = dot(wA[arm], i)
s = sP
# negative log likelihood
ll += log(prob)
end
return -ll
end
nTaus = 10
nAlphas = 10
nGammas = 10
nStarts = 50
nSubs = 10
mTau = 1900
mAlpha = 1
mGamma = 1
#Distances to Best Fit
distances = DataFrame()
distances["Taus"] = zeros(nTaus * nAlphas * nGammas)
distances["Alphas"] = zeros(nTaus * nAlphas * nGammas)
distances["Gammas"] = zeros(nTaus * nAlphas * nGammas)
distances["Mean Distance"] = zeros(nTaus * nAlphas * nGammas)
distances["Distance Variance"] = zeros(nTaus * nAlphas * nGammas)
distances["Tau Distance"] = zeros(nTaus * nAlphas * nGammas)
distances["Tau Variance"] = zeros(nTaus * nAlphas * nGammas)
distances["Alpha Distance"] = zeros(nTaus * nAlphas * nGammas)
distances["Alpha Variance"] = zeros(nTaus * nAlphas * nGammas)
distances["Gamma Distance"] = zeros(nTaus * nAlphas * nGammas)
distances["Gamma Variance"] = zeros(nTaus * nAlphas * nGammas)
index = 1
for tau = mTau/nTaus:mTau/nTaus:mTau
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma
minTaus = zeros(nSubs)
minAlphas = zeros(nSubs)
minGammas = zeros(nSubs)
for i = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
minLL = 1000
minTau = 0
minAlpha = 0
minGamma = 0
for j = 1:nStarts
sTau = rand() * mTau
sAlpha = rand() * mAlpha
sGamma = rand() * mGamma
o = optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma])
if o.f_minimum < minLL
minLL = o.f_minimum
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum
end
end
end
distances["Taus"][index] = tau
distances["Alphas"][index] = alpha
distances["Gammas"][index] = gamma
allTDist = tau - minTaus
allADist = alpha - minAlphas
allGDist = gamma - minGammas
allDists = sqrt(allTDist.^2 + allADist.^2 + allGDist.^2)
distances["Mean Distance"][index] = mean(allDists)
distances["Distance Variance"][index] = var(allDists)
distances["Tau Distance"][index] = mean(allTDist)
distances["Tau Variance"][index] = var(allTDist)
distances["Alpha Distance"][index] = mean(allADist)
distances["Alpha Variance"][index] = var(allADist)
distances["Gamma Distance"][index] = mean(allGDist)
distances["Gamma Variance"][index] = var(allGDist)
index += 1
end
end
end
#Confusion Matrix for Maximizing Parameters
m = classifier[:(Max .== 1), :]
confusion = DataFrame()
confusion["Taus"] = zeros(length(m[1]))
confusion["Alphas"] = zeros(length(m[1]))
confusion["Gammas"] = zeros(length(m[1]))
confusion["Median Fit Taus"] = zeros(length(m[1]))
confusion["Median Fit Alphas"] = zeros(length(m[1]))
confusion["Median Fit Gammas"] = zeros(length(m[1]))
confusion["Mean Fit Taus"] = zeros(length(m[1]))
confusion["Mean Fit Alphas"] = zeros(length(m[1]))
confusion["Mean Fit Gammas"] = zeros(length(m[1]))
confusion["MinTaus"] = [zeros(nSubs) for i = 1:length(m[1])]
confusion["MinAlphas"] = [zeros(nSubs) for i = 1:length(m[1])]
confusion["MinGammas"] = [zeros(nSubs) for i = 1:length(m[1])]
for index = 1:length(m[1])
tau = m["Taus"][index]
alpha = m["Alphas"][index]
gamma = m["Gammas"][index]
minTaus = zeros(nSubs)
minAlphas = zeros(nSubs)
minGammas = zeros(nSubs)
for i = 1:nSubs
sim = simulateMixQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
minLL = 1000
for j = 1:nStarts
sTau = rand() * mTau
sAlpha = rand() * mAlpha
sGamma = rand() * mGamma
o = Optim.optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma])
if o.f_minimum < minLL
minLL = o.f_minimum
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum
end
end
end
confusion["Taus"][index] = tau
confusion["Alphas"][index] = alpha
confusion["Gammas"][index] = gamma
confusion["Median Fit Taus"][index] = median(minTaus)
confusion["Median Fit Alphas"][index] = median(minAlphas)
confusion["Median Fit Gammas"][index] = median(minGammas)
confusion["Mean Fit Taus"][index] = mean(minTaus)
confusion["Mean Fit Alphas"][index] = mean(minAlphas)
confusion["Mean Fit Gammas"][index] = mean(minGammas)
confusion["MinTaus"][index] = minTaus
confusion["MinAlphas"][index] = minAlphas
confusion["MinGammas"][index] = minGammas
end
#Learning curves for maximizing parameters
m = classifier[:(Max .== 1), :]
learncurves = DataFrame()
learncurves["Taus"] = m["Taus"]
learncurves["Alphas"] = m["Alphas"]
learncurves["Gammas"] = m["Gammas"]
learncurves["Percent LT by trial"] = [zeros(nPlays) for i = 1:length(m[1])]
for index = 1:length(m[1])
tau = learncurves["Taus"][index]
alpha = learncurves["Alphas"][index]
gamma = learncurves["Gammas"][index]
for i = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
learncurves["Percent LT by trial"][index] += (sim.choices - 1)
end
learncurves["Percent LT by trial"][index] /= nSubs
learncurves["Percent LT by trial"][index] = 1 - learncurves["Percent LT by trial"][index]
end
#Fits from each subject
for index = 1:length(m[1])
tau = m["Tau"][index]
alpha = m["Alpha"][index]
gamma = m["Gamma"][index]
minTaus = zeros(nSubs)
minAlphas = zeros(nSubs)
minGammas = zeros(nSubs)
for i = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
minLL = 1000
for j = 1:nStarts
sTau = rand() * mTau
sAlpha = rand() * mAlpha
sGamma = rand() * mGamma
o = Optim.optimize(p -> fitQ(p, sim.choices, sim.rewards, theta, nArms, nStates), [sTau, sAlpha, sGamma])
if o.f_minimum < minLL
minLL = o.f_minimum
(minTaus[i], minAlphas[i], minGammas[i]) = o.minimum
end
end
end
confusion["Taus"][index] = tau
confusion["Alphas"][index] = alpha
confusion["Gammas"][index] = gamma
confusion["Mean Fit Taus"][index] = mean(minTaus)
confusion["Mean Fit Alphas"][index] = mean(minAlphas)
confusion["Mean Fit Gammas"][index] = mean(minGammas)
confusion["MinTaus"][index] = minTaus
confusion["MinAlphas"][index] = minAlphas
confusion["MinGammas"][index] = minGammas
end
#Learning curves for fits
fitcurves = DataFrame()
taus = confusion["MinTaus"]
alphas = confusion["MinAlphas"]
gammas = confusion["MinGammas"]
fitcurves["Taus"] = confusion["Taus"]
fitcurves["Alphas"] = confusion["Alphas"]
fitcurves["Gammas"] = confusion["Gammas"]
fitcurves["Percent LT by trial"] = [zeros(nPlays) for i = 1:length(taus)]
for index = 1:length(taus)
for i = 1:length(taus[index])
tau = taus[index][i]
alpha = alphas[index][i]
gamma = gammas[index][i]
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
fitcurves["Percent LT by trial"][index] += (sim.choices - 1)
end
fitcurves["Percent LT by trial"][index] /= nSubs
fitcurves["Percent LT by trial"][index] = 1 - learncurves["Percent LT by trial"][index]
end
nTaus = 10
nAlphas = 10
nGammas = 10
mTau = 1000
mAlpha = 1
mGamma = 1
#All possible log likelihoods
fits = DataFrame()
fits["Tau"] = zeros(nTaus * nAlphas * nGammas)
fits["Alpha"] = zeros(nTaus * nAlphas * nGammas)
fits["Gamma"] = zeros(nTaus * nAlphas * nGammas)
fits["LL"] = zeros(nTaus * nAlphas * nGammas)
index = 1
for tau = mTau/nTaus:mTau/nTaus:mTau
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma
fits["Tau"][index] = tau
fits["Alpha"][index] = alpha
fits["Gammas"][index] = gamma
fits["LL"][index] = fitQ([tau, alpha, gamma], sim.choices, sim.rewards, theta)
index += 1
end
end
end
nTaus = 10
nAlphas = 10
nGammas = 10
mTau = 1900
mAlpha = 1
mGamma = 1
nArms = 2
nPlays = 500
nStates = 11
nUnits = 14
nSubs = 100
#Classify parameter combinations as maximizing or meliorating
classifier = DataFrame()
classifier["Tau"] = zeros(nTaus * nAlphas * nGammas)
classifier["Alpha"] = zeros(nTaus * nAlphas * nGammas)
classifier["Gamma"] = zeros(nTaus * nAlphas * nGammas)
classifier["nMax"] = zeros(nTaus * nAlphas * nGammas)
classifier["nMel"] = zeros(nTaus * nAlphas * nGammas)
classifier["Max"] = zeros(nTaus * nAlphas * nGammas)
classifier["Mel"] = zeros(nTaus * nAlphas * nGammas)
classifier["Rand"] = zeros(nTaus * nAlphas * nGammas)
index = 1
for tau = mTau/nTaus:mTau/nTaus:mTau
for alpha = mAlpha/nAlphas:mAlpha/nAlphas:mAlpha
for gamma = mGamma/nGammas:mGamma/nGammas:mGamma
classifier["Tau"][index] = tau
classifier["Alpha"][index] = alpha
classifier["Gamma"][index] = gamma
nMax = 0
nMel = 0
for i = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
aveChoice = mean(sim.choices[400:nPlays] - 1)
if aveChoice <= .4
nMax += 1
elseif aveChoice >= .6
nMel += 1
end
end
classifier["nMax"][index] = nMax
classifier["nMel"][index] = nMel
if nMax >= 60
classifier["Max"][index] = 1
elseif nMel >= 60
classifier["Mel"][index] = 1
else
classifier["Rand"][index] = 1
end
index += 1
end
end
end
#Classify maximizing parameter fits
classifier = DataFrame()
l = length(confusion[1])
classifier["Tau"] = confusion["Mean Fit Taus"]
classifier["Alpha"] = confusion["Mean Fit Alphas"]
classifier["Gamma"] = confusion["Mean Fit Gammas"]
classifier["nMax"] = zeros(l)
classifier["nMel"] = zeros(l)
classifier["Max"] = zeros(l)
classifier["Mel"] = zeros(l)
classifier["Rand"] = zeros(l)
for i = 1:l
tau = classifier["Tau"][i]
alpha = classifier["Alpha"][i]
gamma = classifier["Gamma"][i]
nMax = 0
nMel = 0
for index = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
aveChoice = mean(sim.choices[400:nPlays] - 1)
if aveChoice <= .4
nMax += 1
elseif aveChoice >= .6
nMel += 1
end
end
classifier["nMax"][i] = nMax
classifier["nMel"][i] = nMel
if nMax >= 60
classifier["Max"][i] = 1
elseif nMel >= 60
classifier["Mel"][i] = 1
else
classifier["Rand"][i] = 1
end
end
#Classify maximizing parameter fits for each subject
classifier = DataFrame()
l = length(taus)
classifier["Tau"] = taus
classifier["Alpha"] = alphas
classifier["Gamma"] = gammas
classifier["nMax"] = zeros(l)
classifier["nMel"] = zeros(l)
classifier["Max"] = zeros(l)
classifier["Mel"] = zeros(l)
classifier["Rand"] = zeros(l)
for i = 1:l
tau = taus[i]
alpha = alphas[i]
gamma = gammas[i]
nMax = 0
nMel = 0
for index = 1:nSubs
sim = simulateQ(theta, alpha, tau, gamma, nArms, nPlays, nStates, nUnits)
aveChoice = mean(sim.choices[400:nPlays] - 1)
if aveChoice <= .4
nMax += 1
elseif aveChoice >= .6
nMel += 1
end
end
classifier["nMax"][i] = nMax
classifier["nMel"][i] = nMel
if nMax >= 60
classifier["Max"][i] = 1
elseif nMel >= 60
classifier["Mel"][i] = 1
else
classifier["Rand"][i] = 1
end
end
r = classifier[:(Rand .== 1), :]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment