name: DEFAULT vars: foo: bar slurm: name: '{config[name]}' partitions: - SINGLE num_parallel_jobs: 50 cpus_per_task: 1 mem-per-cpu: 3000 time: 1440 # in minutes ntasks: 1 sh_lines: [''] #runner: sb3 runner: debug scheduler: repetitions: 3 agents_per_job: 3 reps_per_agent: 1 wandb: project: '{config[name]}' sync_tensorboard: True monitor_gym: True save_code: False video: enable: True length: 3000 frequency: 100 test: enable: True length: 3000 frequency: 100 # 32 # 10 deterministic: Both num_envs: 1 env: name: BoxPushingDense-v0 legacy_fancy: True normalize_obs: True normalize_rew: True num_envs: 1 env_args: more_obs: True algo: name: PPO total_timesteps: 10000 policy_name: MlpPolicy n_steps: 4096 vf_coef: 1.0e-5 learning_rate: 5.0e-5 batch_size: 512 action_coef: 0 ent_coef: 0 normalize_advantage: False # True pca: enable: False window: 64 skip_conditioning: True Base_Noise: WHITE init_std: 1.0 --- sweep: enable: True method: random, metric: goal: minimize, name: score parameters: lel: lol --- ablative: task: add_time_awareness: [True] add_normalize_obs: [False] env_args: more_obs: [True] algorithm: network: #ent_coef: [0, 0.001, 0.003] normalize_advantage: [True] distribution: init_std: [0.5]