Removed old comments

This commit is contained in:
Dominik Moritz Roth 2022-06-25 21:56:07 +02:00
parent b8488c531b
commit 80741776d2

View File

@ -247,7 +247,6 @@ class TRL_PG(OnPolicyAlgorithm):
# p = self.policy(rollout_data.observations) # p = self.policy(rollout_data.observations)
# proj_p = self.projection(self.policy, p, b_q = (b_old_mean, b_old_std), self._global_step) # proj_p = self.projection(self.policy, p, b_q = (b_old_mean, b_old_std), self._global_step)
# new_logpacs = self.policy.log_probability(proj_p, b_actions) # new_logpacs = self.policy.log_probability(proj_p, b_actions)
# log_prob == new_pogpacs (i think)
# src of evaluate_actions: # src of evaluate_actions:
# pol = self.policy # pol = self.policy
@ -279,8 +278,6 @@ class TRL_PG(OnPolicyAlgorithm):
values = self.policy.value_net(latent_vf) values = self.policy.value_net(latent_vf)
entropy = proj_p.entropy() entropy = proj_p.entropy()
# log_prob = p.log_prob(actions)
values = values.flatten() values = values.flatten()
# Normalize advantage # Normalize advantage
advantages = rollout_data.advantages advantages = rollout_data.advantages