name: DEFAULT project: alpha slurm: name: False partition: "single" num_parallel_jobs: 64 cpus-per-task: 1 mem-per-cpu: 3000 time: 1440 # in minutes repetitions: 3 agents_per_job: 3 reps_per_agent: 1 total_timesteps: 10000 video: enable: True length: 3000 frequency: 100 test: enable: True length: 3000 frequency: 100 # 32 # 10 deterministic: Both num_envs: 1 env: name: BoxPushingDense-v0 legacy_fancy: True normalize_obs: True normalize_rew: True num_envs: 1 env_args: more_obs:True algo: name: PPO policy_name: MlpPolicy n_steps: 4096 vf_coef: 1.0e-5 learning_rate: 5.0e-5 batch_size: 512 action_coef: 0 ent_coef: 0 normalize_advantage: False # True pca: enable: False window: 64 skip_conditioning: True Base_Noise: WHITE init_std: 1.0 --- sweep: enable: True method: random, metric: goal: minimize, name: score parameters: lel: lol --- ablative: task: add_time_awareness: [True] add_normalize_obs: [False] env_args: more_obs: [True] algorithm: network: #ent_coef: [0, 0.001, 0.003] normalize_advantage: [True] distribution: init_std: [0.5]