From 02e4ed15102879fa10075333b0296a2123a6d3eb Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 27 Aug 2022 15:19:00 +0200 Subject: [PATCH] Added support for parallel envs --- .../distributions/distributions.py | 29 +++++++------------ metastable_baselines/ppo/policies.py | 2 +- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/metastable_baselines/distributions/distributions.py b/metastable_baselines/distributions/distributions.py index 22831d4..06e6807 100644 --- a/metastable_baselines/distributions/distributions.py +++ b/metastable_baselines/distributions/distributions.py @@ -22,13 +22,6 @@ from stable_baselines3.common.distributions import DiagGaussianDistribution from ..misc.tensor_ops import fill_triangular from ..misc.tanhBijector import TanhBijector -# TODO: Integrate and Test what I currently have before adding more complexity -# TODO: Support Squashed Dists (tanh) -# TODO: Contextual Cov -# TODO: - Hybrid -# TODO: Contextual SDE (Scalar + Diag + Full) -# TODO: (SqrtInducedCov (Scalar + Diag + Full)) - class Strength(Enum): NONE = 0 @@ -220,17 +213,13 @@ class UniversalGaussianDistribution(SB3_Distribution): return mean_actions, chol def _sqrt_to_chol(self, cov_sqrt): - vec = False - nobatch = False - if len(cov_sqrt.shape) <= 2: - vec = True - if len(cov_sqrt.shape) == 1: - nobatch = True + vec = self.cov_strength != Strength.FULL + batch_dims = len(cov_sqrt.shape) - 2 + 1*vec if vec: cov_sqrt = th.diag_embed(cov_sqrt) - if nobatch: + if batch_dims == 0: cov = th.mm(cov_sqrt.mT, cov_sqrt) cov += th.eye(cov.shape[-1])*(self.epsilon) else: @@ -533,8 +522,12 @@ class CholNet(nn.Module): # S[i,j] e (0, pi) where i = 2..n, j = 2..i # We already ensure S > 0 in _chol_from_flat_sphe_chol # We ensure < pi by applying tanh*pi to all applicable elements - batch = (len(sphe_chol.shape) == 3) - batch_size = sphe_chol.shape[0] + vec = self.cov_strength != Strength.FULL + batch_dims = len(sphe_chol.shape) - 2 + 1*vec + batch = batch_dims != 0 + batch_shape = sphe_chol.shape[:batch_dims] + batch_shape_scalar = batch_shape + (1,) + S = sphe_chol n = sphe_chol.shape[-1] L = th.zeros_like(sphe_chol) @@ -542,13 +535,13 @@ class CholNet(nn.Module): #t = 1 t = th.Tensor([1])[0] if batch: - t = t.expand((batch_size, 1)) + t = t.expand(batch_shape_scalar) #s = '' for j in range(i+1): #maybe_cos = 1 maybe_cos = th.Tensor([1])[0] if batch: - maybe_cos = maybe_cos.expand((batch_size, 1)) + maybe_cos = maybe_cos.expand(batch_shape_scalar) #s_maybe_cos = '' if i != j and j < n-1 and i < n: if batch: diff --git a/metastable_baselines/ppo/policies.py b/metastable_baselines/ppo/policies.py index 273aa79..4f66ee2 100644 --- a/metastable_baselines/ppo/policies.py +++ b/metastable_baselines/ppo/policies.py @@ -101,7 +101,7 @@ class ActorCriticPolicy(BasePolicy): optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, dist_kwargs: Optional[Dict[str, Any]] = None, - sqrt_induced_gaussian=False, + sqrt_induced_gaussian: bool = False, ): if optimizer_kwargs is None: