Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
def retrain(self, batch_size):
minibatch = random.sample(self.expirience_replay, batch_size)
for state, action, reward, next_state, terminated in minibatch:
state = np.expand_dims(np.asarray(state).astype(np.float64), axis=0)
next_state = np.expand_dims(np.asarray(next_state).astype(np.float64), axis=0)
target = self.q_network.predict(state)
if terminated:
target[0][action] = reward
t = self.target_network.predict(next_state)
target[0][action] = reward + self.gamma * np.amax(t), target, epochs=1, verbose=0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment