defaults: - env: brax - experiment_overrides: default - trial_spec: default - _self_ hyperparameters: lr: 3e-4 gamma: 0.99 lmbda: 0.95 clip_ratio: 0.2 value_coef: 0.5 entropy_coef: 0.0 total_time_steps: 50_000_000 num_steps: 64 num_mini_batches: 32 num_envs: 2048 num_epochs: 16 max_grad_norm: 0.5 normalize_advantages: True normalize_env: True anneal_lr: False num_eval: 20 max_episode_steps: 1000 name: "ppo" tags: ["ppo_baseline_retuned"] seed: 0 num_seeds: 1 tune: false checkpoint_dir: null trials: 8 wandb: mode: "online" # set to online to activate wandb entity: "viper_svg" project: "online_sac"