cyoon1729/sac_alpha.py

## sac_alpha.py
"""
# Initialize alpha & associated variables as such in the beginning:
self.alpha = alpha
self.target_entropy = -torch.prod(torch.Tensor(self.env.action_space.shape).to(self.device)).item()
self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
"""

# Compute log_pi for the new actions sampled.
new_means, new_stds, new_zs, new_log_pis  = self.policy_net.sample(states)

# Compute alpha loss.
alpha_loss = (self.log_alpha * (-new_log_pis - self.target_entropy).detach()).mean()

# Update alpha.
self.alpha_optim.zero_grad()
alpha_loss.backward()
self.alpha_optim.step()
self.alpha = self.log_alpha.exp()
	"""
	# Initialize alpha & associated variables as such in the beginning:
	self.alpha = alpha
	self.target_entropy = -torch.prod(torch.Tensor(self.env.action_space.shape).to(self.device)).item()
	self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
	"""

	# Compute log_pi for the new actions sampled.
	new_means, new_stds, new_zs, new_log_pis = self.policy_net.sample(states)

	# Compute alpha loss.
	alpha_loss = (self.log_alpha * (-new_log_pis - self.target_entropy).detach()).mean()

	# Update alpha.
	self.alpha_optim.zero_grad()
	alpha_loss.backward()
	self.alpha_optim.step()
	self.alpha = self.log_alpha.exp()