actor: hidden_sizes: [64, 64] activation_fn: "ReLU" critic: hidden_sizes: [64, 64] activation_fn: "ReLU" ppo: learning_rate: 3e-4 n_steps: 2048 batch_size: 64 n_epochs: 10 gamma: 0.99 gae_lambda: 0.95 clip_range: 0.2 total_timesteps: 1000000 eval_interval: 2048 eval_deterministic: true eval_episodes: 10 loggers: - backend: 'wandb' logger_name: "ppo" experiment_name: "PPO" project: "PPO_project" entity: "your_entity" push_interval: 10