reppo/config/sac.yaml
2025-07-15 19:05:07 -04:00

90 lines
2.0 KiB
YAML

defaults:
- env: brax
- experiment_overrides: default
- trial_spec: default
- platform: torch
- _self_
hyperparameters:
# env and run settings (mostly don't touch)
total_time_steps: 50_000_000
normalize_env: true
max_episode_steps: 1000
eval_interval: 2
num_eval: 20
# optimization settings (seem very stable)
lr: 3e-4
anneal_lr: false
max_grad_norm: 0.5
polyak: 1.0 # maybe ablate ?
# problem discount settings (need tuning)
gamma: 0.99
lmbda: 0.95
lmbda_min: 0.50 # irrelevant if no exploration noise is added
# batch settings (need tuning for MJX humanoid)
num_steps: 128
num_mini_batches: 128
num_envs: 1024
num_epochs: 4
# exploration settings (currently not touched)
exploration_noise_max: 1.0
exploration_noise_min: 1.0
exploration_base_envs: 0
# critic architecture settings (need to be increased for MJX humanoid)
critic_hidden_dim: 512
actor_hidden_dim: 512
vmin: ${env.vmin}
vmax: ${env.vmax}
num_bins: 151
hl_gauss: true
use_critic_norm: true
num_critic_encoder_layers: 2
num_critic_head_layers: 2
num_critic_pred_layers: 2
use_simplical_embedding: False
# actor architecture settings (seem stable)
use_actor_norm: true
num_actor_layers: 3
actor_min_std: 0.0
# actor & critic loss settings (seem remarkably stable)
## kl settings
kl_start: 0.01
kl_bound: 0.1 # switched to tighter bounds for MJX
reduce_kl: true
reverse_kl: false # previous default "false"
update_kl_lagrangian: true
actor_kl_clip_mode: "clipped" # "full", "clipped", "kl_relu_clipped", "kl_bound_clipped", "value"
## entropy settings
ent_start: 0.01
ent_target_mult: 0.5
update_entropy_lagrangian: true
## auxiliary loss settings
aux_loss_mult: 1.0
measure_burnin: 3
name: "sac"
seed: 0
num_seeds: 1
tune: false
checkpoint_dir: null
num_trials: 10
tags: ["experimental"]
wandb:
mode: "online" # set to online to activate wandb
entity: "viper_svg"
project: "online_sac"
hydra:
job:
chdir: True