From 4ec5c65cf2e320cd34b1051ff578a4f55c399de0 Mon Sep 17 00:00:00 2001
From: Dominik Roth <dominik.roth.dev@gmail.com>
Date: Mon, 7 Nov 2022 13:23:55 +0100
Subject: [PATCH] Tiny fix for other envs

---
 metastable_baselines/ppo/policies.py |  2 +
 test.py                              | 66 ++++++++++++++--------------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/metastable_baselines/ppo/policies.py b/metastable_baselines/ppo/policies.py
index 5d03868..74e0ae6 100644
--- a/metastable_baselines/ppo/policies.py
+++ b/metastable_baselines/ppo/policies.py
@@ -136,6 +136,8 @@ class ActorCriticPolicy(BasePolicy):
         self.normalize_images = normalize_images
         self.log_std_init = log_std_init
         # Keyword arguments for gSDE distribution
+        if dist_kwargs == None:
+            dist_kwargs = {}
         if use_sde:
             add_dist_kwargs = {
                 'use_sde': True,
diff --git a/test.py b/test.py
index b074e54..1ce3ba6 100755
--- a/test.py
+++ b/test.py
@@ -24,49 +24,49 @@ def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=Tru
     env = gym.make(env_name)
     use_sde = True
     # th.autograd.set_detect_anomaly(True)
-    sac = SAC(
-        MlpPolicySAC,
-        env,
+    #sac = SAC(
+    #    MlpPolicySAC,
+    #    env,
         # KLProjectionLayer(trust_region_coeff=0.01),
         #projection=WassersteinProjectionLayer(trust_region_coeff=0.01),
-        policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type':
-                       ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}},
-        verbose=0,
-        tensorboard_log=root_path+"/logs_tb/" +
-        env_name+"/sac"+(['', '_sde'][use_sde])+"/",
-        learning_rate=3e-4,  # 3e-4,
-        gamma=0.99,
+    #    policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type':
+    #                   ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}},
+    #    verbose=0,
+    #    tensorboard_log=root_path+"/logs_tb/" +
+    #    env_name+"/sac"+(['', '_sde'][use_sde])+"/",
+    #    learning_rate=3e-4,  # 3e-4,
+    #    gamma=0.99,
         #gae_lambda=0.95,
         #normalize_advantage=True,
         #ent_coef=0.1,  # 0.1
         #vf_coef=0.5,
-        use_sde=use_sde,  # False
-        sde_sample_freq=8,
+    #    use_sde=use_sde,  # False
+    #    sde_sample_freq=8,
         #clip_range=None  # 1  # 0.2,
+    #)
+    trl_frob = PPO(
+       MlpPolicyPPO,
+       env,
+       projection=FrobeniusProjectionLayer(),
+       verbose=0,
+       tensorboard_log=root_path+"/logs_tb/"+env_name +
+       "/trl_frob"+(['', '_sde'][use_sde])+"/",
+       learning_rate=3e-4,
+       gamma=0.99,
+       gae_lambda=0.95,
+       normalize_advantage=True,
+       ent_coef=0.03,  # 0.1
+       vf_coef=0.5,
+       use_sde=use_sde,
+       clip_range=2,  # 0.2
     )
-    # trl_frob = PPO(
-    #    MlpPolicy,
-    #    env,
-    #    projection=FrobeniusProjectionLayer(),
-    #    verbose=0,
-    #    tensorboard_log=root_path+"/logs_tb/"+env_name +
-    #    "/trl_frob"+(['', '_sde'][use_sde])+"/",
-    #    learning_rate=3e-4,
-    #    gamma=0.99,
-    #    gae_lambda=0.95,
-    #    normalize_advantage=True,
-    #    ent_coef=0.03,  # 0.1
-    #    vf_coef=0.5,
-    #    use_sde=use_sde,
-    #    clip_range=2,  # 0.2
-    # )
 
-    print('SAC:')
-    testModel(sac, timesteps, showRes,
-              saveModel, n_eval_episodes)
-    # print('TRL_frob:')
-    # testModel(trl_frob, timesteps, showRes,
+    #print('SAC:')
+    #testModel(sac, timesteps, showRes,
     #          saveModel, n_eval_episodes)
+    print('TRL_frob:')
+    testModel(trl_frob, timesteps, showRes,
+             saveModel, n_eval_episodes)
 
 
 def full(env_name='ColumbusCandyland_Aux10-v0', timesteps=200_000, saveModel=True, n_eval_episodes=4):