Fixed model storage location bug

This commit is contained in:
Dominik Moritz Roth 2022-06-22 13:00:40 +02:00
parent 41d4e94dbe
commit 0e17b4c07e

51
test.py Normal file → Executable file
View File

@ -1,6 +1,8 @@
#!/bin/python3
import gym import gym
from gym.envs.registration import register from gym.envs.registration import register
import numpy as np import numpy as np
import os
import time import time
import datetime import datetime
@ -11,32 +13,34 @@ from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticP
from sb3_trl.trl_pg import TRL_PG from sb3_trl.trl_pg import TRL_PG
import columbus import columbus
#root_path = os.getcwd()
root_path = '.'
def main(env_name='ColumbusEasierObstacles-v0'): def main(env_name='ColumbusStateWithBarriers-v0'):
env = gym.make(env_name) env = gym.make(env_name)
ppo_latent_sde = PPO( ppo_latent_sde = PPO(
"MlpPolicy", "MlpPolicy",
env, env,
verbose=0, verbose=0,
tensorboard_log="./logs_tb/"+env_name+"/ppo_latent_sde/", tensorboard_log=root_path+"/logs_tb/"+env_name+"/ppo_latent_sde/",
use_sde=True, #use_sde=True,
sde_sample_freq=30*15, sde_sample_freq=30*15,
ent_coef=0.0016/1.25, #0.0032 #ent_coef=0.0016/1.25, #0.0032
vf_coef=0.00025/2, #0.0005 #vf_coef=0.00025/2, #0.0005
gamma=0.99, # 0.95 #gamma=0.99, # 0.95
learning_rate=0.005/5 # 0.015 #learning_rate=0.005/5 # 0.015
)
sac_latent_sde = SAC(
"MlpPolicy",
env,
verbose=0,
tensorboard_log="./logs_tb/"+env_name+"/sac_latent_sde/",
use_sde=True,
sde_sample_freq=30*15,
ent_coef=0.0016, #0.0032
gamma=0.99, # 0.95
learning_rate=0.001 # 0.015
) )
#sac_latent_sde = SAC(
# "MlpPolicy",
# env,
# verbose=0,
# tensorboard_log=root_path+"/logs_tb/"+env_name+"/sac_latent_sde/",
# use_sde=True,
# sde_sample_freq=30*15,
# ent_coef=0.0016, #0.0032
# gamma=0.99, # 0.95
# learning_rate=0.001 # 0.015
#)
#trl = TRL_PG( #trl = TRL_PG(
# "MlpPolicy", # "MlpPolicy",
# env, # env,
@ -44,10 +48,10 @@ def main(env_name='ColumbusEasierObstacles-v0'):
# tensorboard_log="./logs_tb/"+env_name+"/trl_pg/", # tensorboard_log="./logs_tb/"+env_name+"/trl_pg/",
#) #)
#print('PPO_LATENT_SDE:') print('PPO_LATENT_SDE:')
#testModel(ppo_latent_sde, 1000000, showRes = True, saveModel=True, n_eval_episodes=3) testModel(ppo_latent_sde, 25000, showRes = True, saveModel=True, n_eval_episodes=3)
print('SAC_LATENT_SDE:') #print('SAC_LATENT_SDE:')
testModel(ppo_latent_sde, 250000, showRes = True, saveModel=True, n_eval_episodes=0) #testModel(sac_latent_sde, 250000, showRes = True, saveModel=True, n_eval_episodes=0)
#print('TRL_PG:') #print('TRL_PG:')
#testModel(trl) #testModel(trl)
@ -58,7 +62,8 @@ def testModel(model, timesteps=150000, showRes=False, saveModel=False, n_eval_ep
if saveModel: if saveModel:
now = datetime.datetime.now().strftime('%d.%m.%Y-%H:%M') now = datetime.datetime.now().strftime('%d.%m.%Y-%H:%M')
model.save('models/'+model.tensorboard_log.replace('./logs_tb/','').replace('/','_')+now+'.zip') loc = root_path+'/models/'+model.tensorboard_log.replace(root_path+'/logs_tb/','').replace('/','_')+now+'.zip'
model.save(loc)
if n_eval_episodes: if n_eval_episodes:
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False) mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False)