defaults:
  - env: brax
  - platform: torch
  - _self_

hyperparameters:
  # env and run settings (mostly don't touch)
  total_time_steps: 50_000_000
  normalize_env: true
  max_episode_steps: 1000
  eval_interval: 2
  num_eval: 20

  # optimization settings (seem very stable)
  lr: 3e-4
  anneal_lr: false
  max_grad_norm: 0.5
  polyak: 1.0 # maybe ablate ?

  # problem discount settings (need tuning)
  gamma: 0.99
  lmbda: 0.95
  lmbda_min: 0.50 # irrelevant if no exploration noise is added

  # batch settings (need tuning for MJX humanoid)
  num_steps: 128
  num_mini_batches: 128
  num_envs: 1024
  num_epochs: 4

  # exploration settings (currently not touched)
  exploration_noise_max: 1.0
  exploration_noise_min: 1.0
  exploration_base_envs: 0

  # critic architecture settings (need to be increased for MJX humanoid)
  critic_hidden_dim: 512
  actor_hidden_dim: 512
  vmin: ${env.vmin}
  vmax: ${env.vmax}
  num_bins: 151
  hl_gauss: true
  use_critic_norm: true
  num_critic_encoder_layers: 2
  num_critic_head_layers: 2
  num_critic_pred_layers: 2
  use_simplical_embedding: False

  # actor architecture settings (seem stable)
  use_actor_norm: true
  num_actor_layers: 3
  actor_min_std: 0.0

  # actor & critic loss settings (seem remarkably stable)
  ## kl settings
  kl_start: 0.01
  kl_bound: 0.1 # switched to tighter bounds for MJX
  reduce_kl: true
  reverse_kl: false # previous default "false"
  update_kl_lagrangian: true
  actor_kl_clip_mode: "clipped" # "full", "clipped", "kl_relu_clipped", "kl_bound_clipped", "value"
  ## entropy settings
  ent_start: 0.01
  ent_target_mult: 0.5
  update_entropy_lagrangian: true
  ## auxiliary loss settings
  aux_loss_mult: 1.0


measure_burnin: 3


name: "reppo"
seed: 0
num_seeds: 1
tune: false
checkpoint_dir: null
num_trials: 10
tags: ["experimental"]
wandb:
  mode: "online" # set to online to activate wandb
  entity: "viper_svg"
  project: "online_sac"

hydra:
  job:
    chdir: True