policy: input_dim: 4 output_dim: 2 hidden_sizes: [64, 64] ppo: learning_rate: 3e-4 n_steps: 2048 batch_size: 64 n_epochs: 10 gamma: 0.99 gae_lambda: 0.95 clip_range: 0.2 total_timesteps: 1000000 eval_interval: 2048 eval_deterministic: true eval_episodes: 10 seed: 42 loggers: - type: terminal - type: wandb project: "PPO_project" entity: "your_entity" push_interval: 10