From d6c05668cb20f7966932401d82e663da92924e82 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 22 Aug 2023 01:01:13 +0200 Subject: [PATCH] fixes --- sbBrix/common/policies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sbBrix/common/policies.py b/sbBrix/common/policies.py index 61d5e38..9e06244 100644 --- a/sbBrix/common/policies.py +++ b/sbBrix/common/policies.py @@ -817,6 +817,7 @@ class Actor(BasePolicy): self.mu, self.log_std = self.action_dist.proba_distribution_net( latent_dim=last_layer_dim, return_log_std=True, **dist_kwargs ) + self._remember_log_std = log_std_init # Avoid numerical issues by limiting the mean of the Gaussian # to be in [-clip_mean, clip_mean] if clip_mean > 0.0: