Testing Observables

This commit is contained in:
Dominik Moritz Roth 2022-08-22 15:05:42 +02:00
parent c6a58b15dd
commit 5c39be5ead

10
test.py
View File

@ -24,8 +24,8 @@ def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=Tru
ppo = PPO( ppo = PPO(
MlpPolicyPPO, MlpPolicyPPO,
env, env,
projection=KLProjectionLayer(trust_region_coeff=0.01), projection=BaseProjectionLayer(), # KLProjectionLayer(trust_region_coeff=0.01),
policy_kwargs={'dist_kwargs': {'neural_strength': Strength.SCALAR, 'cov_strength': Strength.DIAG, 'parameterization_type': policy_kwargs={'dist_kwargs': {'neural_strength': Strength.NONE, 'cov_strength': Strength.DIAG, 'parameterization_type':
ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}}, ParametrizationType.NONE, 'enforce_positive_type': EnforcePositiveType.ABS, 'prob_squashing_type': ProbSquashingType.NONE}},
verbose=0, verbose=0,
tensorboard_log=root_path+"/logs_tb/" + tensorboard_log=root_path+"/logs_tb/" +
@ -37,7 +37,7 @@ def main(env_name='ColumbusCandyland_Aux10-v0', timesteps=1_000_000, showRes=Tru
ent_coef=0.1, # 0.1 ent_coef=0.1, # 0.1
vf_coef=0.5, vf_coef=0.5,
use_sde=use_sde, # False use_sde=use_sde, # False
clip_range=1 # 0.2, clip_range=0.2 # 1 # 0.2,
) )
# trl_frob = PPO( # trl_frob = PPO(
# MlpPolicy, # MlpPolicy,
@ -145,9 +145,9 @@ def testModel(model, timesteps, showRes=False, saveModel=False, n_eval_episodes=
if __name__ == '__main__': if __name__ == '__main__':
# main('LunarLanderContinuous-v2') main('LunarLanderContinuous-v2')
# main('ColumbusJustState-v0') # main('ColumbusJustState-v0')
# main('ColumbusStateWithBarriers-v0') # main('ColumbusStateWithBarriers-v0')
# full('ColumbusEasierObstacles-v0') # full('ColumbusEasierObstacles-v0')
main('ColumbusSingle-v0') # main('ColumbusSingle-v0')
# full('LunarLanderContinuous-v2') # full('LunarLanderContinuous-v2')