Skip to content

Instantly share code, notes, and snippets.

@horoiwa
Created May 10, 2020 06:58
Show Gist options
  • Save horoiwa/5806def06017ba1bfc66550b6e469506 to your computer and use it in GitHub Desktop.
Save horoiwa/5806def06017ba1bfc66550b6e469506 to your computer and use it in GitHub Desktop.
経験再生
import collections
@dataclass
class Experience:
state: np.ndarray
action: int
reward: float
next_state: np.ndarray
done: bool
class DQNAgent:
def __init__(self):
self.experiences = collections.deque(maxlen=20000)
"""略:その他の処理"""
"""略:その他のメソッド"""
def get_minibatch(self, batch_size):
indices = np.random.choice(len(self.experiences),
size=batch_size, replace=False)
selected_experiences = [self.experiences[i] for i in indices]
states = [exp.state for exp in selected_experiences]
actions = [exp.action for exp in selected_experiences]
rewards = [exp.reward for exp in selected_experiences]
next_states = [exp.next_state for exp in selected_experiences]
dones = [exp.done for exp in selected_experiences]
return (states, actions, rewards, next_states, dones)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment