From e7f73dffc131570ef7129d5ed1bc98a05cf030ab Mon Sep 17 00:00:00 2001 From: allenzren Date: Tue, 24 Dec 2024 02:06:17 -0500 Subject: [PATCH] update batch size in D3IL so it works with the new form of gradient update --- cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml | 2 +- cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml | 2 +- cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml index 380cb8d..a8db0fc 100644 --- a/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml index e6e70f0..4396afb 100644 --- a/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml index ccdfd2e..8f030b4 100644 --- a/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1