fancy_rl/example/config.yaml

26 lines
418 B
YAML

policy:
input_dim: 4
output_dim: 2
hidden_sizes: [64, 64]
ppo:
learning_rate: 3e-4
n_steps: 2048
batch_size: 64
n_epochs: 10
gamma: 0.99
gae_lambda: 0.95
clip_range: 0.2
total_timesteps: 1000000
eval_interval: 2048
eval_deterministic: true
eval_episodes: 10
seed: 42
loggers:
- type: terminal
- type: wandb
project: "PPO_project"
entity: "your_entity"
push_interval: 10