NMZivkovic/training.py

## training.py
for e in tqdm(range(0, num_of_episodes)):
    # Reset the enviroment
    state = enviroment.reset()

    # Initialize variables
    reward = 0
    terminated = False

    for timestep in range(timesteps_per_episode):
        state = img_processor.process_env_state(state)

        # Run Action
        action = agent.act(state)

        # Take action
        next_state, reward, terminated, info = enviroment.step(action)
        next_state = img_processor.process_env_state(next_state)

        agent.store(state, action, reward, next_state, terminated)

        state = next_state

        if terminated:
            agent.alighn_target_model()
            break

        if len(agent.expirience_replay) > batch_size:
            agent.retrain(batch_size)
	for e in tqdm(range(0, num_of_episodes)):
	# Reset the enviroment
	state = enviroment.reset()

	# Initialize variables
	reward = 0
	terminated = False

	for timestep in range(timesteps_per_episode):
	state = img_processor.process_env_state(state)

	# Run Action
	action = agent.act(state)

	# Take action
	next_state, reward, terminated, info = enviroment.step(action)
	next_state = img_processor.process_env_state(next_state)

	agent.store(state, action, reward, next_state, terminated)

	state = next_state

	if terminated:
	agent.alighn_target_model()
	break

	if len(agent.expirience_replay) > batch_size:
	agent.retrain(batch_size)