Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
for e in tqdm(range(0, num_of_episodes)):
# Reset the enviroment
state = enviroment.reset()
# Initialize variables
reward = 0
terminated = False
for timestep in range(timesteps_per_episode):
state = img_processor.process_env_state(state)
# Run Action
action = agent.act(state)
# Take action
next_state, reward, terminated, info = enviroment.step(action)
next_state = img_processor.process_env_state(next_state)
agent.store(state, action, reward, next_state, terminated)
state = next_state
if terminated:
agent.alighn_target_model()
break
if len(agent.expirience_replay) > batch_size:
agent.retrain(batch_size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment