Skip to content

Instantly share code, notes, and snippets.

@RafaelAPB
Created December 11, 2018 18:45
Show Gist options
  • Save RafaelAPB/34cb763dacad3613edbae879a43d572c to your computer and use it in GitHub Desktop.
Save RafaelAPB/34cb763dacad3613edbae879a43d572c to your computer and use it in GitHub Desktop.
#Initializations
iterations = []
distance = []
n = 5000
#Stores
P_3 = [np.identity(nX) for i in range(nA)]
C = np.zeros([nX, nA])
Q = np.zeros([nX, nA])
N = np.zeros([nX, nA])
current_state = np.random.randint(0,nX)
#print("Primeiro estado: ",current_state)
for i in range(n):
a = select_action(Q, current_state, 0.1)
#print("Ação atual: ",a)
#print(P[a][current_state])
nextState = np.random.choice([j for j in range(0,nX)], p = P[a][current_state])
#print("Prox. estado: ",nextState)
#Helper to operation
temp = np.zeros(nX)
temp[nextState] = 1
# #change in the average
P_3[a][current_state] += 1/(1+N[current_state,a]) * (temp - P_3[a][current_state])
C[current_state,a] += 1/(1+N[current_state,a]) * (cost[current_state,a] - C[current_state,a])
Q[current_state,a] = C[current_state,a] + gamma*P_3[a][current_state].dot(np.min(Q, axis=1))
#print("P3: ",P_3[a])
#Incrementing a visit
N[current_state,a] += 1
#Caso final, "reiniciar"
if cost[current_state,a] == 0:
current_state = np.random.randint(0,nX)
else:
current_state = nextState
iterations += [i]
distance += [np.linalg.norm(optimalQ-Q)]
# Observe cost function
plt.scatter(iterations, distance, label="Model-based", s = 7)
plt.xlabel("Iterations")
plt.ylabel("||Q*-Q||")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment