diff --git a/test.py b/test.py old mode 100644 new mode 100755 index 1a3ee7d..e5cdaab --- a/test.py +++ b/test.py @@ -1,6 +1,8 @@ +#!/bin/python3 import gym from gym.envs.registration import register import numpy as np +import os import time import datetime @@ -11,32 +13,34 @@ from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticP from sb3_trl.trl_pg import TRL_PG import columbus +#root_path = os.getcwd() +root_path = '.' -def main(env_name='ColumbusEasierObstacles-v0'): +def main(env_name='ColumbusStateWithBarriers-v0'): env = gym.make(env_name) ppo_latent_sde = PPO( "MlpPolicy", env, verbose=0, - tensorboard_log="./logs_tb/"+env_name+"/ppo_latent_sde/", - use_sde=True, + tensorboard_log=root_path+"/logs_tb/"+env_name+"/ppo_latent_sde/", + #use_sde=True, sde_sample_freq=30*15, - ent_coef=0.0016/1.25, #0.0032 - vf_coef=0.00025/2, #0.0005 - gamma=0.99, # 0.95 - learning_rate=0.005/5 # 0.015 - ) - sac_latent_sde = SAC( - "MlpPolicy", - env, - verbose=0, - tensorboard_log="./logs_tb/"+env_name+"/sac_latent_sde/", - use_sde=True, - sde_sample_freq=30*15, - ent_coef=0.0016, #0.0032 - gamma=0.99, # 0.95 - learning_rate=0.001 # 0.015 + #ent_coef=0.0016/1.25, #0.0032 + #vf_coef=0.00025/2, #0.0005 + #gamma=0.99, # 0.95 + #learning_rate=0.005/5 # 0.015 ) + #sac_latent_sde = SAC( + # "MlpPolicy", + # env, + # verbose=0, + # tensorboard_log=root_path+"/logs_tb/"+env_name+"/sac_latent_sde/", + # use_sde=True, + # sde_sample_freq=30*15, + # ent_coef=0.0016, #0.0032 + # gamma=0.99, # 0.95 + # learning_rate=0.001 # 0.015 + #) #trl = TRL_PG( # "MlpPolicy", # env, @@ -44,10 +48,10 @@ def main(env_name='ColumbusEasierObstacles-v0'): # tensorboard_log="./logs_tb/"+env_name+"/trl_pg/", #) - #print('PPO_LATENT_SDE:') - #testModel(ppo_latent_sde, 1000000, showRes = True, saveModel=True, n_eval_episodes=3) - print('SAC_LATENT_SDE:') - testModel(ppo_latent_sde, 250000, showRes = True, saveModel=True, n_eval_episodes=0) + print('PPO_LATENT_SDE:') + testModel(ppo_latent_sde, 25000, showRes = True, saveModel=True, n_eval_episodes=3) + #print('SAC_LATENT_SDE:') + #testModel(sac_latent_sde, 250000, showRes = True, saveModel=True, n_eval_episodes=0) #print('TRL_PG:') #testModel(trl) @@ -58,7 +62,8 @@ def testModel(model, timesteps=150000, showRes=False, saveModel=False, n_eval_ep if saveModel: now = datetime.datetime.now().strftime('%d.%m.%Y-%H:%M') - model.save('models/'+model.tensorboard_log.replace('./logs_tb/','').replace('/','_')+now+'.zip') + loc = root_path+'/models/'+model.tensorboard_log.replace(root_path+'/logs_tb/','').replace('/','_')+now+'.zip' + model.save(loc) if n_eval_episodes: mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False)