From 4ec5c65cf2e320cd34b1051ff578a4f55c399de0 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 7 Nov 2022 13:23:55 +0100 Subject: [PATCH] Tiny fix for other envs --- metastable_baselines/ppo/policies.py | 2 + test.py | 66 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/metastable_baselines/ppo/policies.py b/metastable_baselines/ppo/policies.py index 5d03868..74e0ae6 100644 --- a/metastable_baselines/ppo/policies.py +++ b/metastable_baselines/ppo/policies.py @@ -136,6 +136,8 @@ class ActorCriticPolicy(BasePolicy): self.normalize_images = normalize_images self.log_std_init = log_std_init # Keyword arguments for gSDE distribution + if dist_kwargs == None: + dist_kwargs = {} if use_sde: add_dist_kwargs = { 'use_sde': True, diff --git a/test.py b/test.py index b074e54..1ce3ba6 100755 --- a/test.py +++ b/test.py @@ -24,49 +24,49 @@ def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=Tru env = gym.make(env_name) use_sde = True # th.autograd.set_detect_anomaly(True) - sac = SAC( - MlpPolicySAC, - env, + #sac = SAC( + # MlpPolicySAC, + # env, # KLProjectionLayer(trust_region_coeff=0.01), #projection=WassersteinProjectionLayer(trust_region_coeff=0.01), - policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type': - ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}}, - verbose=0, - tensorboard_log=root_path+"/logs_tb/" + - env_name+"/sac"+(['', '_sde'][use_sde])+"/", - learning_rate=3e-4, # 3e-4, - gamma=0.99, + # policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type': + # ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}}, + # verbose=0, + # tensorboard_log=root_path+"/logs_tb/" + + # env_name+"/sac"+(['', '_sde'][use_sde])+"/", + # learning_rate=3e-4, # 3e-4, + # gamma=0.99, #gae_lambda=0.95, #normalize_advantage=True, #ent_coef=0.1, # 0.1 #vf_coef=0.5, - use_sde=use_sde, # False - sde_sample_freq=8, + # use_sde=use_sde, # False + # sde_sample_freq=8, #clip_range=None # 1 # 0.2, + #) + trl_frob = PPO( + MlpPolicyPPO, + env, + projection=FrobeniusProjectionLayer(), + verbose=0, + tensorboard_log=root_path+"/logs_tb/"+env_name + + "/trl_frob"+(['', '_sde'][use_sde])+"/", + learning_rate=3e-4, + gamma=0.99, + gae_lambda=0.95, + normalize_advantage=True, + ent_coef=0.03, # 0.1 + vf_coef=0.5, + use_sde=use_sde, + clip_range=2, # 0.2 ) - # trl_frob = PPO( - # MlpPolicy, - # env, - # projection=FrobeniusProjectionLayer(), - # verbose=0, - # tensorboard_log=root_path+"/logs_tb/"+env_name + - # "/trl_frob"+(['', '_sde'][use_sde])+"/", - # learning_rate=3e-4, - # gamma=0.99, - # gae_lambda=0.95, - # normalize_advantage=True, - # ent_coef=0.03, # 0.1 - # vf_coef=0.5, - # use_sde=use_sde, - # clip_range=2, # 0.2 - # ) - print('SAC:') - testModel(sac, timesteps, showRes, - saveModel, n_eval_episodes) - # print('TRL_frob:') - # testModel(trl_frob, timesteps, showRes, + #print('SAC:') + #testModel(sac, timesteps, showRes, # saveModel, n_eval_episodes) + print('TRL_frob:') + testModel(trl_frob, timesteps, showRes, + saveModel, n_eval_episodes) def full(env_name='ColumbusCandyland_Aux10-v0', timesteps=200_000, saveModel=True, n_eval_episodes=4):