From 2b725ec58b84d6527e993d8c808ceb2d953d1e62 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 17 Jun 2022 11:29:36 +0200 Subject: [PATCH] Added test.py for testing the algos (and tensorboard integration) --- run_tensorboard.sh | 2 ++ test.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100755 run_tensorboard.sh create mode 100644 test.py diff --git a/run_tensorboard.sh b/run_tensorboard.sh new file mode 100755 index 0000000..9623ac9 --- /dev/null +++ b/run_tensorboard.sh @@ -0,0 +1,2 @@ +#!/bin/bash +tensorboard --logdir logs_tb/test diff --git a/test.py b/test.py new file mode 100644 index 0000000..9bfa910 --- /dev/null +++ b/test.py @@ -0,0 +1,63 @@ +import gym +import numpy as np +import time + +from stable_baselines3 import SAC, PPO, A2C +from stable_baselines3.common.evaluation import evaluate_policy + +from sb3_trl.trl_pg import TRL_PG + +def main(): + env = gym.make("LunarLander-v2") + ppo = PPO( + "MlpPolicy", + env, + verbose=0, + tensorboard_log="./logs_tb/test/", + ) + a2c = A2C( + "MlpPolicy", + env, + verbose=0, + ) + trl = TRL_PG( + "MlpPolicy", + env, + verbose=0, + tensorboard_log="./logs_tb/test/", + ) + + print('PPO:') + testModel(ppo) + print('A2C:') + testModel(a2c) + print('TRL_PG:') + testModel(trl) + + +def testModel(model, timesteps=50000, showRes=False): + env = model.get_env() + model.learn(timesteps) + + mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=16, deterministic=False) + + print('Reward: '+str(round(mean_reward,3))+'±'+str(round(std_reward,2))) + + if showRes: + obs = env.reset() + # Evaluate the agent + episode_reward = 0 + for _ in range(1000): + time.sleep(1/30) + action, _ = model.predict(obs, deterministic=False) + obs, reward, done, info = env.step(action) + env.render() + episode_reward += reward + if done: + #print("Reward:", episode_reward) + episode_reward = 0.0 + obs = env.reset() + env.reset() + +if __name__=='__main__': + main()