Created
March 13, 2013 20:51
-
-
Save tpott/5156034 to your computer and use it in GitHub Desktop.
One of these has a semantic error that is fixed in the other. Can you find which one??
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def value_iteration_A(probs, gamma, rewards, oldV): | |
newV = oldV.copy() | |
nstates, nactions = probs.shape[0], probs.shape[-1] | |
for s in range(nstates): | |
weightedV = np.zeros([nactions], dtype='float64') | |
for a in range(nactions): | |
weightedV[a] = np.dot(probs[s,:,a], oldV) | |
newV[s] = rewards[s] + gamma * weightedV.max() | |
return newV | |
def value_iteration_B(probs, gamma, rewards, oldV): | |
newV = oldV.copy() | |
nstates, nactions = probs.shape[0], probs.shape[-1] | |
for s in range(nstates): | |
weightedV = np.zeros([nactions], dtype='float64') | |
for a in range(nactions): | |
weightedV = np.dot(probs[s,:,a], oldV) | |
newV[s] = rewards[s] + gamma * weightedV.max() | |
return newV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment