defaults: - env: brax - platform: torch - _self_ hyperparameters: # env and run settings (mostly don't touch) total_time_steps: 50_000_000 normalize_env: true max_episode_steps: 1000 eval_interval: 2 num_eval: 20 # optimization settings (seem very stable) lr: 3e-4 anneal_lr: false max_grad_norm: 0.5 polyak: 1.0 # maybe ablate ? # problem discount settings (need tuning) gamma: 0.99 lmbda: 0.95 lmbda_min: 0.50 # irrelevant if no exploration noise is added # batch settings (need tuning for MJX humanoid) num_steps: 128 num_mini_batches: 128 num_envs: 1024 num_epochs: 4 # exploration settings (currently not touched) exploration_noise_max: 1.0 exploration_noise_min: 1.0 exploration_base_envs: 0 # critic architecture settings (need to be increased for MJX humanoid) critic_hidden_dim: 512 actor_hidden_dim: 512 vmin: ${env.vmin} vmax: ${env.vmax} num_bins: 151 hl_gauss: true use_critic_norm: true num_critic_encoder_layers: 2 num_critic_head_layers: 2 num_critic_pred_layers: 2 use_simplical_embedding: False # actor architecture settings (seem stable) use_actor_norm: true num_actor_layers: 3 actor_min_std: 0.0 # actor & critic loss settings (seem remarkably stable) ## kl settings kl_start: 0.01 kl_bound: 0.1 # switched to tighter bounds for MJX reduce_kl: true reverse_kl: false # previous default "false" update_kl_lagrangian: true actor_kl_clip_mode: "clipped" # "full", "clipped", "kl_relu_clipped", "kl_bound_clipped", "value" ## entropy settings ent_start: 0.01 ent_target_mult: 0.5 update_entropy_lagrangian: true ## auxiliary loss settings aux_loss_mult: 1.0 measure_burnin: 3 name: "reppo" seed: 0 num_seeds: 1 tune: false checkpoint_dir: null num_trials: 10 tags: ["experimental"] wandb: mode: "online" # set to online to activate wandb entity: "viper_svg" project: "online_sac" hydra: job: chdir: True