From 5f7cfd2e10d95445a3d095e909753f2aad6b3110 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 25 Jun 2022 15:37:45 +0200 Subject: [PATCH] Note about Code-Src --- sb3_trl/trl_pg/trl_pg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sb3_trl/trl_pg/trl_pg.py b/sb3_trl/trl_pg/trl_pg.py index ef1b62c..34b2c83 100644 --- a/sb3_trl/trl_pg/trl_pg.py +++ b/sb3_trl/trl_pg/trl_pg.py @@ -230,6 +230,7 @@ class TRL_PG(OnPolicyAlgorithm): # values, log_prob, entropy = self.policy.evaluate_actions(rollout_data.observations, actions) # src in TRL reference code: + # Stolen from Fabian's Code (Public Version): # p = self.policy(rollout_data.observations) # proj_p = self.projection(self.policy, p, b_q = (b_old_mean, b_old_std), self._global_step) # new_logpacs = self.policy.log_probability(proj_p, b_actions)