Removed old TODOs

2022-08-28 12:07:19 +02:00 · 2022-08-28 12:07:19 +02:00 · 4080ad8135
commit 4080ad8135
parent eb881559d6
3 changed files with 7 additions and 9 deletions
--- a/metastable_baselines/distributions/distributions.py
+++ b/metastable_baselines/distributions/distributions.py
@ -200,7 +200,6 @@ class UniversalGaussianDistribution(SB3_Distribution):

        assert std_init >= 0.0, "std can not be initialized to a negative value."

-        # TODO: Implement SDE
        self.latent_sde_dim = latent_sde_dim

        mean_actions = nn.Linear(latent_dim, self.action_dim)
@ -348,7 +347,6 @@ class UniversalGaussianDistribution(SB3_Distribution):

    def get_noise(self, latent_sde: th.Tensor) -> th.Tensor:
        latent_sde = latent_sde if self.learn_features else latent_sde.detach()
-        # # TODO: Good idea?
        latent_sde = th.nn.functional.normalize(latent_sde, dim=-1)
        # Default case: only one exploration matrix
        if len(latent_sde) == 1 or len(latent_sde) != len(self.exploration_matrices):
@ -579,7 +577,6 @@ class CholNet(nn.Module):
                                                                        dim2=-1)).diag_embed() + chol.triu(1)

    def string(self):
-        # TODO
        return '<CholNet />'


--- a/metastable_baselines/ppo/policies.py
+++ b/metastable_baselines/ppo/policies.py
@ -79,8 +79,6 @@ class ActorCriticPolicy(BasePolicy):
        excluding the learning rate, to pass to the optimizer
    """

-    # TODO: Allow passing of dist_kwargs into dist
-
    def __init__(
        self,
        observation_space: gym.spaces.Space,
--- a/test.py
+++ b/test.py
@ -15,18 +15,21 @@ import columbus

 from metastable_baselines.distributions import Strength, ParametrizationType, EnforcePositiveType, ProbSquashingType

+import torch as th
+
 root_path = '.'


 def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=True, saveModel=True, n_eval_episodes=0):
    env = gym.make(env_name)
    use_sde = False
+    # th.autograd.set_detect_anomaly(True)
    ppo = PPO(
        MlpPolicyPPO,
        env,
-        projection=BaseProjectionLayer(), # KLProjectionLayer(trust_region_coeff=0.01),
-        policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type':
-                       ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}},
+        projection=BaseProjectionLayer(),  # KLProjectionLayer(trust_region_coeff=0.01),
+        policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.FULL, 'parameterization_type':
+                       ParametrizationType.CHOL, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}},
        verbose=0,
        tensorboard_log=root_path+"/logs_tb/" +
        env_name+"/ppo"+(['', '_sde'][use_sde])+"/",
@ -37,7 +40,7 @@ def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=Tru
        ent_coef=0.1,  # 0.1
        vf_coef=0.5,
        use_sde=use_sde,  # False
-        clip_range=0.2 # 1  # 0.2,
+        clip_range=None  # 1  # 0.2,
    )
    # trl_frob = PPO(
    #    MlpPolicy,