name: DEFAULT
project: alpha

slurm:
  name: False
  partition: "single"
  num_parallel_jobs: 64
  cpus-per-task: 1
  mem-per-cpu: 3000
  time: 1440 # in minutes

repetitions: 3
agents_per_job: 3
reps_per_agent: 1

total_timesteps: 10000

video:
  enable: True
  length: 3000
  frequency: 100

test:
  enable: True
  length: 3000
  frequency: 100 # 32 # 10
  deterministic: Both
  num_envs: 1

env:
  name: BoxPushingDense-v0
  legacy_fancy: True
  normalize_obs: True
  normalize_rew: True
  num_envs: 1
  env_args:
    more_obs:True

algo:
  name: PPO
  policy_name: MlpPolicy
  n_steps: 4096
  vf_coef: 1.0e-5
  learning_rate: 5.0e-5
  batch_size: 512
  action_coef: 0
  ent_coef: 0
  normalize_advantage: False # True

pca:
  enable: False
  window: 64
  skip_conditioning: True
  Base_Noise: WHITE
  init_std: 1.0
---
sweep:
  enable: True
  method: random,
  metric:
    goal: minimize,
    name: score
  parameters:
    lel: lol
---
ablative:
  task:
    add_time_awareness: [True]
    add_normalize_obs: [False]
    env_args:
      more_obs: [True]
  algorithm:
    network:
      #ent_coef: [0, 0.001, 0.003]
      normalize_advantage: [True]
    distribution:
      init_std: [0.5]