def retrain(self, batch_size): | |
minibatch = random.sample(self.expirience_replay, batch_size) | |
for state, action, reward, next_state, terminated in minibatch: | |
target = self.q_network.predict(state) | |
if terminated: | |
target[0][action] = reward | |
else: | |
t = self.target_network.predict(next_state) | |
target[0][action] = reward + self.gamma * np.amax(t) | |
self.q_network.fit(state, target, epochs=1, verbose=0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment