import random import numpy as np reward = np.array([[0, -10, 0, -1], [0, -1, -10, -1], [0, -1, -1, 10], [0, -10, -1, -1], [-10, -1, 0, -1], [-1, -1, -10, -1], [1, -10, -1, -10], [100, -1, -1, 0], [-10, -1, 0, -1], [-1, -1, -1, -10], [-1, -1, -1, -1], [-10, -1, -10, 0], [-1, 0, 0, -1], [-1, 0, -1, -1], [-10, 0, -1, -1], [-1, 0, -1, 0]]) n_s = np.array([[-1,4,-1,1], [-1,5,0,2], [-1,6,1,3], [-1,7,2,-1], [0,8,-1,5], [1,9,4,6], [2,10,5,7], [3,11,6,-1], [4,12,-1,9], [5,13,8,10], [6,14,9,11], [7,15,10,-1], [8,-1,-1,13], [9,-1,12,14], [10,-1,13,15], [11,-1,14,-1]]) action = np.array([[0,3], [1, 2, 3], [1, 2, 3], [1, 2], [0,1,3], [0,1,2,3], [0,1,2,3], [0,1,2], [0,1,3], [0,1,2,3], [0,1,2,3], [0,1,2], [0,3], [0,1,3], [0,2,3], [0,2]]) Q = np.zeros((16,4)) "Q = np.load('Q Mat.npy')" i_s = np.array([[1,2,5,6,8,9,11,12,13,14,15]]) Gamma = 0.7 lrA = 0.7 lrB = 0.01 i_state = 11 n_state = 0 episode = 1 xd = [] while episode<10: re = 0 i_state = random.choice(i_s[0]) xd = [] while(i_state != 3): Qx = -999 for i in action[i_state]: if Q[i_state][i]>Qx: act = i Qx = Q[i_state][i] n_state = n_s[i_state][act] "print(i_state," ",n_state)" nxt_values = [] for i in action[n_state]: nxt_values.append(Q[n_state][i]) Max = max(nxt_values) re = re + reward[i_state][act] + Gamma*Max - Q[i_state][act] Q[i_state][act] = Q[i_state][act] + lrA*(reward[i_state][act] + Gamma*Max - Q[i_state][act]) xd.append(i_state) i_state = n_state xd.append(i_state) "lrA = max(lrB,lrA-0.1*lrA)" print("Episode ",episode," Reward ", re) episode = episode + 1 print(xd) print("The Final Q Matrix is: \n", np.divide(Q,np.amax(Q))) np.save('Q Mat2', Q)