djbyrne/bolts_rl_key_comp.py

## bolts_rl_key_comp.py
LitRLModel(pl.LightningModule):

  def __init__(self, env, ...):

    # Environemnt
    self.env = gym.make(env)
    self.env.seed(123)

    self.obs_shape = self.env.observation_space.shape
    self.n_actions = self.env.action_space.n

    # Agent
    self.agent = ValueAgent(self.net, self.n_actions)

  # Dataset
  def train_dataloader(self) -> DataLoader:
      self.dataset = ExperienceSourceDataset(self.train_batch)
      return DataLoader(dataset=self.dataset, batch_size=self.batch_size)

  # Train Batch
  def train_batch(self) -> Tuple:
     # keep taking steps during training
     while True:
            # take a step in the environment
            action = self.agent(self.state, self.device)
            next_state, reward, done, _ = self.env.step(action[0])

            # add results to the batch
            ...

            # when the batch is ready, yield to the dataset
            yield batch
	LitRLModel(pl.LightningModule):

	def __init__(self, env, ...):

	# Environemnt
	self.env = gym.make(env)
	self.env.seed(123)

	self.obs_shape = self.env.observation_space.shape
	self.n_actions = self.env.action_space.n

	# Agent
	self.agent = ValueAgent(self.net, self.n_actions)

	# Dataset
	def train_dataloader(self) -> DataLoader:
	self.dataset = ExperienceSourceDataset(self.train_batch)
	return DataLoader(dataset=self.dataset, batch_size=self.batch_size)

	# Train Batch
	def train_batch(self) -> Tuple:
	# keep taking steps during training
	while True:
	# take a step in the environment
	action = self.agent(self.state, self.device)
	next_state, reward, done, _ = self.env.step(action[0])

	# add results to the batch
	...

	# when the batch is ready, yield to the dataset
	yield batch