diff --git a/agent/eval/eval_agent.py b/agent/eval/eval_agent.py index d982bd5..d6774e3 100644 --- a/agent/eval/eval_agent.py +++ b/agent/eval/eval_agent.py @@ -57,6 +57,7 @@ class EvalAgent: self.horizon_steps = cfg.horizon_steps self.max_episode_steps = cfg.env.max_episode_steps self.reset_at_iteration = cfg.env.get("reset_at_iteration", True) + self.save_full_observations = cfg.env.get("save_full_observations", False) self.furniture_sparse_reward = ( cfg.env.specific.get("sparse_reward", False) if "specific" in cfg.env @@ -85,6 +86,10 @@ class EvalAgent: assert not ( self.n_render <= 0 and self.render_video ), "Need to set n_render > 0 if saving video" + self.traj_plotter = ( + hydra.utils.instantiate(cfg.plotter) + if "plotter" in cfg else None + ) def run(self): pass diff --git a/agent/eval/eval_diffusion_agent.py b/agent/eval/eval_diffusion_agent.py index 577c11b..6833372 100644 --- a/agent/eval/eval_diffusion_agent.py +++ b/agent/eval/eval_diffusion_agent.py @@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent): prev_obs_venv = self.reset_env_all(options_venv=options_venv) firsts_trajs[0] = 1 reward_trajs = np.zeros((self.n_steps, self.n_envs)) + if self.save_full_observations: # state-only + obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim)) + obs_full_trajs = np.vstack( + (obs_full_trajs, prev_obs_venv["state"][:, -1][None]) + ) # Collect a set of trajectories from env for step in range(self.n_steps): @@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent): ) reward_trajs[step] = reward_venv firsts_trajs[step + 1] = terminated_venv | truncated_venv + if self.save_full_observations: # state-only + obs_full_venv = np.array( + [info["full_obs"]["state"] for info in info_venv] + ) # n_envs x act_steps x obs_dim + obs_full_trajs = np.vstack( + (obs_full_trajs, obs_full_venv.transpose(1, 0, 2)) + ) # update for next step prev_obs_venv = obs_venv @@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent): success_rate = 0 log.info("[WARNING] No episode completed within the iteration!") + # Plot state trajectories (only in D3IL) + if self.traj_plotter is not None: + self.traj_plotter( + obs_full_trajs=obs_full_trajs, + n_render=self.n_render, + max_episode_steps=self.max_episode_steps, + render_dir=self.render_dir, + itr=0, + ) + # Log loss and save metrics time = timer() log.info( diff --git a/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml new file mode 100644 index 0000000..e3d0653 --- /dev/null +++ b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz + +seed: 42 +device: cuda:0 +env_name: avoiding-m5 +obs_dim: 4 +action_dim: 2 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 25 +render_num: 40 + +plotter: + _target_: env.plot_traj.TrajPlotter + env_type: avoid + normalization_path: ${normalization_path} + +env: + n_envs: 40 + name: ${env_name} + max_episode_steps: 100 + reset_at_iteration: True + save_video: False + best_reward_threshold_for_success: 2 + save_full_observations: True + wrappers: + d3il_lowdim: + normalization_path: ${normalization_path} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + pass_full_observations: ${env.save_full_observations} + reset_within_step: False + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 16 + mlp_dims: [512, 512, 512] + activation_type: ReLU + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml index d938219..479868d 100644 --- a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 15000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 15000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml index cc9898d..13ea725 100644 --- a/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml index 24c4fef..0f6d6d0 100644 --- a/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 - batch_size: 32 + n_epochs: 5000 + batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml index 1d7479e..cb4aaaf 100644 --- a/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 15000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 15000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml index 0828b4d..ed1aa50 100644 --- a/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml index ae1af2a..0be031c 100644 --- a/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 - batch_size: 32 + n_epochs: 5000 + batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml index 4bdd65a..580664b 100644 --- a/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 15000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 15000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml index b2aeaf7..7eb09bd 100644 --- a/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 + n_epochs: 5000 batch_size: 16 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml index e860bc0..9b908b0 100644 --- a/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml +++ b/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 + n_epochs: 5000 batch_size: 32 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 5000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/finetuning.md b/cfg/finetuning.md index b7532f2..6fde4df 100644 --- a/cfg/finetuning.md +++ b/cfg/finetuning.md @@ -1,5 +1,7 @@ ## Fine-tuning experiments +**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications. + ### Comparing diffusion-based RL algorithms (Sec. 5.1) Gym configs are under `cfg/gym/finetune//`, and the naming follows `ft__diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env. diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml new file mode 100644 index 0000000..ea712a7 --- /dev/null +++ b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml @@ -0,0 +1,66 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth + +seed: 42 +device: cuda:0 +env_name: ${env.specific.furniture}_${env.specific.randomness}_dim +obs_dim: 44 +action_dim: 10 +denoising_steps: 100 +cond_steps: 1 +horizon_steps: 8 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} +render_num: 0 + +env: + n_envs: 1000 + name: ${env_name} + env_type: furniture + max_episode_steps: 1000 + best_reward_threshold_for_success: 2 + specific: + headless: true + furniture: lamp + randomness: low + normalization_path: ${normalization_path} + obs_steps: ${cond_steps} + act_steps: ${act_steps} + sparse_reward: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 32 + mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024] + cond_mlp_dims: [512, 64] + use_layernorm: True # needed for larger MLP + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml new file mode 100644 index 0000000..6de459a --- /dev/null +++ b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth + +seed: 42 +device: cuda:0 +env_name: ${env.specific.furniture}_${env.specific.randomness}_dim +obs_dim: 44 +action_dim: 10 +denoising_steps: 100 +cond_steps: 1 +horizon_steps: 16 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} +render_num: 0 + +env: + n_envs: 1000 + name: ${env_name} + env_type: furniture + max_episode_steps: 1000 + best_reward_threshold_for_success: 2 + specific: + headless: true + furniture: lamp + randomness: low + normalization_path: ${normalization_path} + obs_steps: ${cond_steps} + act_steps: ${act_steps} + sparse_reward: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 64 + dim_mults: [1, 2, 4] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + groupnorm_eps: 1e-4 # not important + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml index 2f8d0ef..3694c70 100644 --- a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml +++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml @@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt +base_policy_path: normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth seed: 42 diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml new file mode 100644 index 0000000..c1626d9 --- /dev/null +++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth + +seed: 42 +device: cuda:0 +env_name: ${env.specific.furniture}_${env.specific.randomness}_dim +obs_dim: 58 +action_dim: 10 +denoising_steps: 100 +cond_steps: 1 +horizon_steps: 16 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} +render_num: 0 + +env: + n_envs: 1000 + name: ${env_name} + env_type: furniture + max_episode_steps: 700 + best_reward_threshold_for_success: 1 + specific: + headless: true + furniture: one_leg + randomness: low + normalization_path: ${normalization_path} + obs_steps: ${cond_steps} + act_steps: ${act_steps} + sparse_reward: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 64 + dim_mults: [1, 2, 4] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + groupnorm_eps: 1e-4 # not important + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml new file mode 100644 index 0000000..9109faf --- /dev/null +++ b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml @@ -0,0 +1,66 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth + +seed: 42 +device: cuda:0 +env_name: ${env.specific.furniture}_${env.specific.randomness}_dim +obs_dim: 44 +action_dim: 10 +denoising_steps: 100 +cond_steps: 1 +horizon_steps: 8 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} +render_num: 0 + +env: + n_envs: 1000 + name: ${env_name} + env_type: furniture + max_episode_steps: 1000 + best_reward_threshold_for_success: 2 + specific: + headless: true + furniture: round_table + randomness: low + normalization_path: ${normalization_path} + obs_steps: ${cond_steps} + act_steps: ${act_steps} + sparse_reward: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 32 + mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024] + cond_mlp_dims: [512, 64] + use_layernorm: True # needed for larger MLP + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml new file mode 100644 index 0000000..daf050e --- /dev/null +++ b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth + +seed: 42 +device: cuda:0 +env_name: ${env.specific.furniture}_${env.specific.randomness}_dim +obs_dim: 44 +action_dim: 10 +denoising_steps: 100 +cond_steps: 1 +horizon_steps: 16 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} +render_num: 0 + +env: + n_envs: 1000 + name: ${env_name} + env_type: furniture + max_episode_steps: 1000 + best_reward_threshold_for_success: 2 + specific: + headless: true + furniture: round_table + randomness: low + normalization_path: ${normalization_path} + obs_steps: ${cond_steps} + act_steps: ${act_steps} + sparse_reward: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 64 + dim_mults: [1, 2, 4] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + groupnorm_eps: 1e-4 # not important + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml index a9b4fa6..438d4cf 100644 --- a/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml index bec9393..61785de 100644 --- a/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml index e07591d..bd383a9 100644 --- a/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml @@ -30,7 +30,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml index d6fb48d..f83fab4 100644 --- a/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml index 4ed0a25..c2c8568 100644 --- a/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml index 61b3ac1..c58ef3b 100644 --- a/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml @@ -30,7 +30,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml index 6c733d7..bfbb4c4 100644 --- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml index c2a44da..57d0a1f 100644 --- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml index 5175736..2d43baa 100644 --- a/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml @@ -30,7 +30,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml index d06a3a7..551b028 100644 --- a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml index 8a31f26..ccf0e67 100644 --- a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml index e7b9e7d..b58dd2b 100644 --- a/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml @@ -25,12 +25,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 10000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml index a49e1ce..66a23a4 100644 --- a/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml index 4fd8cf0..63d4df7 100644 --- a/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml index a2252e6..e44488c 100644 --- a/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml @@ -30,7 +30,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml index b81c31d..a5f2865 100644 --- a/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml +++ b/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml index cee0254..b8a5fec 100644 --- a/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml +++ b/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml @@ -31,7 +31,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 8000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml index c7d0e30..041b511 100644 --- a/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml +++ b/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml @@ -30,7 +30,7 @@ train: learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml index bfef1e1..b8e0404 100644 --- a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml @@ -17,10 +17,10 @@ obs_dim: 17 action_dim: 6 denoising_steps: 20 cond_steps: 1 -horizon_steps: 1 -act_steps: 1 +horizon_steps: 4 +act_steps: 4 -n_steps: 1000 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. +n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. render_num: 0 env: diff --git a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml index 754ed1e..4c0fcac 100644 --- a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml @@ -20,7 +20,7 @@ cond_steps: 1 horizon_steps: 4 act_steps: 4 -n_steps: 500 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. +n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. render_num: 0 env: diff --git a/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml new file mode 100644 index 0000000..1b1d2b0 --- /dev/null +++ b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml @@ -0,0 +1,61 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: walker2d-medium-v2 +obs_dim: 17 +action_dim: 6 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. +render_num: 0 + +env: + n_envs: 40 + name: ${env_name} + max_episode_steps: 1000 + reset_at_iteration: False + save_video: False + best_reward_threshold_for_success: 3 # success rate not relevant for gym tasks + wrappers: + mujoco_locomotion_lowdim: + normalization_path: ${normalization_path} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 16 + mlp_dims: [512, 512, 512] + activation_type: ReLU + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml index 4c368b0..88ff719 100644 --- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 3000 + n_epochs: 200 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 3000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 100 diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml index 53c74b5..050b922 100644 --- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml @@ -23,15 +23,14 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 500 + n_epochs: 200 batch_size: 128 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 - save_model_freq: 100 model: diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml index 01b0df8..6d6fb0a 100644 --- a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 3000 + n_epochs: 200 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 3000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 100 diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml index c4d0fe3..54099d2 100644 --- a/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 500 + n_epochs: 200 batch_size: 128 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 100 diff --git a/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml index 67b726c..49c8454 100644 --- a/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 3000 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml index ff479d1..a749342 100644 --- a/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 0 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml index 959b405..b8f2855 100644 --- a/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 3000 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml index e498113..67aa18b 100644 --- a/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml index affc770..acb5fb6 100644 --- a/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-5 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 3000 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml index be98366..35e0579 100644 --- a/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml index 6f32b0c..ccaf830 100644 --- a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml +++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 3000 + n_epochs: 200 batch_size: 128 learning_rate: 1e-3 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 3000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 100 diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml index 99b2f8c..d24932d 100644 --- a/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml +++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 3000 + n_epochs: 200 batch_size: 128 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 3000 + first_cycle_steps: 200 warmup_steps: 1 min_lr: 1e-4 save_model_freq: 100 diff --git a/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml index 49f11ed..052cf90 100644 --- a/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml +++ b/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml @@ -1,7 +1,7 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent @@ -42,7 +42,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 501 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -55,7 +55,7 @@ train: critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 1000 warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -67,7 +67,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 10000 + batch_size: 5000 update_epochs: 10 vf_coef: 0.5 target_kl: 1 @@ -75,7 +75,7 @@ train: model: _target_: model.diffusion.diffusion_ppo.PPODiffusion # HP to tune - gamma_denoising: 0.99 + gamma_denoising: 1 clip_ploss_coef: 0.1 clip_ploss_coef_base: 0.1 clip_ploss_coef_rate: 3 @@ -94,10 +94,10 @@ model: residual_style: True critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} ft_denoising_steps: ${ft_denoising_steps} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} diff --git a/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml index b0c1241..ed2c881 100644 --- a/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml +++ b/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml @@ -40,7 +40,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 501 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -65,7 +65,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 1000 + batch_size: 500 update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml index 729a0c6..39edb31 100644 --- a/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml +++ b/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml @@ -42,7 +42,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 301 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -67,7 +67,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 10000 + batch_size: 5000 update_epochs: 10 vf_coef: 0.5 target_kl: 1 @@ -75,7 +75,7 @@ train: model: _target_: model.diffusion.diffusion_ppo.PPODiffusion # HP to tune - gamma_denoising: 0.99 + gamma_denoising: 1 clip_ploss_coef: 0.1 clip_ploss_coef_base: 0.1 clip_ploss_coef_rate: 3 diff --git a/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml index 05f5766..941ead1 100644 --- a/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml +++ b/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml @@ -40,7 +40,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 301 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -65,7 +65,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 1000 + batch_size: 500 update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml index 2c1769f..89a0c85 100644 --- a/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml +++ b/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml @@ -42,7 +42,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 501 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -55,7 +55,7 @@ train: critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 1000 warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -67,7 +67,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 10000 + batch_size: 5000 update_epochs: 10 vf_coef: 0.5 target_kl: 1 @@ -75,7 +75,7 @@ train: model: _target_: model.diffusion.diffusion_ppo.PPODiffusion # HP to tune - gamma_denoising: 0.99 + gamma_denoising: 1 clip_ploss_coef: 0.1 clip_ploss_coef_base: 0.1 clip_ploss_coef_rate: 3 @@ -94,10 +94,10 @@ model: residual_style: True critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} ft_denoising_steps: ${ft_denoising_steps} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} diff --git a/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml index 70b6267..5a32530 100644 --- a/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml +++ b/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml @@ -40,7 +40,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 301 n_critic_warmup_itr: 0 n_steps: 1000 gamma: 0.99 @@ -65,7 +65,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: 1000 + batch_size: 500 update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/pretraining.md b/cfg/pretraining.md index 84a2b5a..943677e 100644 --- a/cfg/pretraining.md +++ b/cfg/pretraining.md @@ -1,6 +1,6 @@ ## Pre-training experiments -**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints. +**Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6. ### Comparing diffusion-based RL algorithms (Sec. 5.1) Gym configs are under `cfg/gym/pretrain//`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain//`, and the name is also `pre_diffusion_mlp`. diff --git a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml index b100545..55db305 100644 --- a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml +++ b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml @@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt +base_policy_path: robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz @@ -28,7 +28,7 @@ n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 env: - n_envs: 50 + n_envs: 20 # reduce gpu usage name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml new file mode 100644 index 0000000..2d1ac3b --- /dev/null +++ b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: can +obs_dim: 23 +action_dim: 7 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 75 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 40 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + wrappers: + robomimic_lowdim: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + 'object'] # same order of preprocessed observations + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml new file mode 100644 index 0000000..3b8f643 --- /dev/null +++ b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml @@ -0,0 +1,102 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: can +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True +ddim_steps: 5 + +n_steps: 300 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 20 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['robot0_eye_in_hand_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: + - 1 + - 2 + kernel_size: 5 + n_groups: 8 + smaller_encoder: false + cond_predict_scale: true + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml b/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml index 25a3719..2efb0dc 100644 --- a/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml +++ b/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml @@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt +base_policy_path: robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz diff --git a/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml b/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml index 7aa0269..4b6507a 100644 --- a/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml +++ b/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml @@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt +base_policy_path: robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml new file mode 100644 index 0000000..6a0aa81 --- /dev/null +++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml @@ -0,0 +1,65 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: lift +obs_dim: 19 +action_dim: 7 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 300 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + wrappers: + robomimic_lowdim: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + 'object'] # same order of preprocessed observations + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 16 + mlp_dims: [512, 512, 512] + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml new file mode 100644 index 0000000..bf8c232 --- /dev/null +++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml @@ -0,0 +1,97 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: lift +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True +ddim_steps: 5 + +n_steps: 300 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 20 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['robot0_eye_in_hand_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + augment: False + spatial_emb: 128 + time_dim: 32 + mlp_dims: [512, 512, 512] + residual_style: True + img_cond_steps: ${img_cond_steps} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml new file mode 100644 index 0000000..800354c --- /dev/null +++ b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: lift +obs_dim: 19 +action_dim: 7 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 75 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 40 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + wrappers: + robomimic_lowdim: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + 'object'] # same order of preprocessed observations + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml new file mode 100644 index 0000000..35c567b --- /dev/null +++ b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml @@ -0,0 +1,100 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: lift +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True +ddim_steps: 5 + +n_steps: 300 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 20 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['robot0_eye_in_hand_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml index 759c653..1009edc 100644 --- a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml +++ b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml @@ -18,8 +18,8 @@ obs_dim: 23 action_dim: 7 denoising_steps: 20 cond_steps: 1 -horizon_steps: 1 -act_steps: 1 +horizon_steps: 4 +act_steps: 4 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml new file mode 100644 index 0000000..624a1b3 --- /dev/null +++ b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml @@ -0,0 +1,97 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: square +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True +ddim_steps: 5 + +n_steps: 400 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 20 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 400 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['agentview_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + augment: False + spatial_emb: 128 + time_dim: 32 + mlp_dims: [768, 768, 768] + residual_style: True + img_cond_steps: ${img_cond_steps} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml new file mode 100644 index 0000000..7280703 --- /dev/null +++ b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml @@ -0,0 +1,68 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: square +obs_dim: 23 +action_dim: 7 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 4 +act_steps: 4 + +n_steps: 100 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 400 + save_video: False + wrappers: + robomimic_lowdim: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + 'object'] # same order of preprocessed observations + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml new file mode 100644 index 0000000..d35d975 --- /dev/null +++ b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml @@ -0,0 +1,102 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: square +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True +ddim_steps: 5 + +n_steps: 400 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 30 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 400 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['agentview_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: + - 1 + - 2 + kernel_size: 5 + n_groups: 8 + smaller_encoder: false + cond_predict_scale: true + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/square/eval_gaussian_mlp.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml similarity index 62% rename from cfg/robomimic/eval/square/eval_gaussian_mlp.yaml rename to cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml index 3e6a089..23826aa 100644 --- a/cfg/robomimic/eval/square/eval_gaussian_mlp.yaml +++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml @@ -3,9 +3,9 @@ defaults: hydra: run: dir: ${logdir} -_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent -name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps} +name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} base_policy_path: robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json @@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization. seed: 42 device: cuda:0 -env_name: square -obs_dim: 23 -action_dim: 7 +env_name: transport +obs_dim: 59 +action_dim: 14 +denoising_steps: 20 cond_steps: 1 -horizon_steps: 1 -act_steps: 1 +horizon_steps: 8 +act_steps: 8 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -27,7 +28,7 @@ env: n_envs: 50 name: ${env_name} best_reward_threshold_for_success: 1 - max_episode_steps: 400 + max_episode_steps: 800 save_video: False wrappers: robomimic_lowdim: @@ -35,6 +36,9 @@ env: low_dim_keys: ['robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', + "robot1_eef_pos", + "robot1_eef_quat", + "robot1_gripper_qpos", 'object'] # same order of preprocessed observations multi_step: n_obs_steps: ${cond_steps} @@ -42,19 +46,24 @@ env: max_episode_steps: ${env.max_episode_steps} reset_within_step: True + model: - _target_: model.common.gaussian.GaussianModel + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 randn_clip_value: 3 # network_path: ${base_policy_path} network: - _target_: model.common.mlp_gaussian.Gaussian_MLP + _target_: model.diffusion.mlp_diffusion.DiffusionMLP + time_dim: 32 mlp_dims: [1024, 1024, 1024] - activation_type: ReLU - use_layernorm: true - fixed_std: 0.1 + residual_style: True cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} - + action_dim: ${action_dim} horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml new file mode 100644 index 0000000..7413246 --- /dev/null +++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml @@ -0,0 +1,102 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: transport +obs_dim: 18 +action_dim: 14 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 8 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: 200 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 30 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 800 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + "robot1_eef_pos", + "robot1_eef_quat", + "robot1_gripper_qpos"] + image_keys: ['shouldercamera0_image', + 'shouldercamera1_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [6, 96, 96] + state: + shape: [18] + action: + shape: [14] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + augment: False + num_img: 2 + spatial_emb: 128 + time_dim: 32 + mlp_dims: [768, 768, 768] + residual_style: True + img_cond_steps: ${img_cond_steps} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml new file mode 100644 index 0000000..e644bfc --- /dev/null +++ b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml @@ -0,0 +1,71 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz + +seed: 42 +device: cuda:0 +env_name: transport +obs_dim: 59 +action_dim: 14 +denoising_steps: 20 +cond_steps: 1 +horizon_steps: 16 +act_steps: 8 + +n_steps: 100 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 800 + save_video: False + wrappers: + robomimic_lowdim: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + "robot1_eef_pos", + "robot1_eef_quat", + "robot1_gripper_qpos", + 'object'] # same order of preprocessed observations + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.Unet1D + diffusion_step_embed_dim: 16 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml new file mode 100644 index 0000000..81b0046 --- /dev/null +++ b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml @@ -0,0 +1,107 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent + +name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: transport +obs_dim: 18 +action_dim: 14 +denoising_steps: 100 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 16 +act_steps: 8 +use_ddim: True +ddim_steps: 5 + +n_steps: 400 # each episode takes max_episode_steps / act_steps steps +render_num: 0 + +env: + n_envs: 30 # reduce gpu usage + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 800 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + "robot1_eef_pos", + "robot1_eef_quat", + "robot1_gripper_qpos"] + image_keys: ['shouldercamera0_image', + 'shouldercamera1_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [6, 96, 96] + state: + shape: [18] + action: + shape: [14] + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + randn_clip_value: 3 + # + use_ddim: ${use_ddim} + ddim_steps: ${ddim_steps} + network_path: ${base_policy_path} + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + num_img: 2 + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: + - 1 + - 2 + kernel_size: 5 + n_groups: 8 + smaller_encoder: false + cond_predict_scale: true + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml index 8256876..4c68449 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml @@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms +# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz @@ -54,13 +55,13 @@ train: actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml index 54a4ab1..24117e7 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml @@ -66,16 +66,16 @@ train: gamma: 0.999 augment: True grad_accumulate: 15 - actor_lr: 1e-4 + actor_lr: 5e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-4 + min_lr: 5e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml index 6f3c0ce..a21c180 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -47,20 +47,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 151 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml new file mode 100644 index 0000000..4e3c56c --- /dev/null +++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml @@ -0,0 +1,173 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent + +name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: can +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +ft_denoising_steps: 5 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['robot0_eye_in_hand_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env_name}-finetune + run: ${now:%H-%M-%S}_${name} + +train: + n_train_itr: 151 + n_critic_warmup_itr: 2 + n_steps: 300 + gamma: 0.999 + augment: True + grad_accumulate: 15 + actor_lr: 5e-5 + actor_weight_decay: 0 + actor_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 5e-5 + critic_lr: 1e-3 + critic_weight_decay: 0 + critic_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 1e-3 + save_model_freq: 100 + val_freq: 10 + render: + freq: 1 + num: 0 + # PPO specific + reward_scale_running: True + reward_scale_const: 1.0 + gae_lambda: 0.95 + batch_size: 500 + logprob_batch_size: 500 + update_epochs: 10 + vf_coef: 0.5 + target_kl: 1 + +model: + _target_: model.diffusion.diffusion_ppo.PPODiffusion + # HP to tune + gamma_denoising: 0.99 + clip_ploss_coef: 0.01 + clip_ploss_coef_base: 0.001 + clip_ploss_coef_rate: 3 + randn_clip_value: 3 + min_sampling_denoising_std: 0.1 + min_logprob_denoising_std: 0.1 + # + use_ddim: ${use_ddim} + ddim_steps: ${ft_denoising_steps} + learn_eta: False + eta: + base_eta: 1 + input_dim: ${obs_dim} + mlp_dims: [256, 256] + action_dim: ${action_dim} + min_eta: 0.1 + max_eta: 1.0 + _target_: model.diffusion.eta.EtaFixed + network_path: ${base_policy_path} + actor: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + critic: + _target_: model.common.critic.ViTCritic + spatial_emb: 128 + augment: False + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + mlp_dims: [256, 256, 256] + activation_type: Mish + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + ft_denoising_steps: ${ft_denoising_steps} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml index 1f093e2..d03a676 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml @@ -45,20 +45,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 151 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml index fcba3e6..581b659 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml @@ -1,7 +1,7 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent @@ -57,22 +57,22 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 200 + n_train_itr: 151 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 augment: True grad_accumulate: 5 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 200 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 200 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -140,9 +140,9 @@ model: embed_style: embed2 embed_norm: 0 img_cond_steps: ${img_cond_steps} - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml index 3b6254d..006769f 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml @@ -45,20 +45,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 151 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml index 1e7beb2..d141be2 100644 --- a/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml +++ b/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml @@ -46,20 +46,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 151 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml index 16b9485..3666789 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml @@ -1,13 +1,14 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} -base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms +# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz @@ -54,13 +55,13 @@ train: actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml index 72207d6..8c6bcc8 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml @@ -60,22 +60,22 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 151 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 augment: True grad_accumulate: 15 - actor_lr: 1e-4 + actor_lr: 5e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-4 + min_lr: 5e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml index 6550645..e20383a 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -47,20 +47,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -102,10 +102,10 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} ft_denoising_steps: ${ft_denoising_steps} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml new file mode 100644 index 0000000..f72b70b --- /dev/null +++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml @@ -0,0 +1,173 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent + +name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: lift +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +ft_denoising_steps: 5 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 300 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['robot0_eye_in_hand_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env_name}-finetune + run: ${now:%H-%M-%S}_${name} + +train: + n_train_itr: 81 + n_critic_warmup_itr: 2 + n_steps: 300 + gamma: 0.999 + augment: True + grad_accumulate: 15 + actor_lr: 5e-5 + actor_weight_decay: 0 + actor_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 5e-5 + critic_lr: 1e-3 + critic_weight_decay: 0 + critic_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 1e-3 + save_model_freq: 100 + val_freq: 10 + render: + freq: 1 + num: 0 + # PPO specific + reward_scale_running: True + reward_scale_const: 1.0 + gae_lambda: 0.95 + batch_size: 500 + logprob_batch_size: 500 + update_epochs: 10 + vf_coef: 0.5 + target_kl: 1 + +model: + _target_: model.diffusion.diffusion_ppo.PPODiffusion + # HP to tune + gamma_denoising: 0.99 + clip_ploss_coef: 0.01 + clip_ploss_coef_base: 0.001 + clip_ploss_coef_rate: 3 + randn_clip_value: 3 + min_sampling_denoising_std: 0.1 + min_logprob_denoising_std: 0.1 + # + use_ddim: ${use_ddim} + ddim_steps: ${ft_denoising_steps} + learn_eta: False + eta: + base_eta: 1 + input_dim: ${obs_dim} + mlp_dims: [256, 256] + action_dim: ${action_dim} + min_eta: 0.1 + max_eta: 1.0 + _target_: model.diffusion.eta.EtaFixed + network_path: ${base_policy_path} + actor: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + critic: + _target_: model.common.critic.ViTCritic + spatial_emb: 128 + augment: False + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + mlp_dims: [256, 256, 256] + activation_type: Mish + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + ft_denoising_steps: ${ft_denoising_steps} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml index 6bab450..5ea4132 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -45,20 +45,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -93,9 +93,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml index 6f589c3..dbd9b3c 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml @@ -1,7 +1,7 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent @@ -57,22 +57,22 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 200 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 augment: True grad_accumulate: 5 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 200 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 200 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -140,9 +140,9 @@ model: embed_style: embed2 embed_norm: 0 img_cond_steps: ${img_cond_steps} - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml index fff3c02..d7b9965 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -45,20 +45,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -94,9 +94,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml index 1b31a4e..28e3280 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -46,20 +46,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -94,9 +94,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml index 4a89144..45c0024 100644 --- a/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml +++ b/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 300 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -46,20 +46,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 300 + n_train_itr: 81 n_critic_warmup_itr: 2 n_steps: 300 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-5 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -95,9 +95,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml index edbe296..eab5648 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml @@ -1,7 +1,7 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -54,14 +54,14 @@ train: actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml index 84355d6..e9ad66c 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml @@ -69,13 +69,13 @@ train: actor_lr: 1e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml index 794017a..2031557 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml @@ -1,7 +1,7 @@ defaults: - _self_ hydra: - run: + run: dir: ${logdir} _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -47,21 +47,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 2e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-5 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -102,10 +102,10 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} ft_denoising_steps: ${ft_denoising_steps} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml new file mode 100644 index 0000000..4dba7ee --- /dev/null +++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml @@ -0,0 +1,173 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent + +name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: square +obs_dim: 9 +action_dim: 7 +denoising_steps: 100 +ft_denoising_steps: 5 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 4 +act_steps: 4 +use_ddim: True + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 400 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos'] + image_keys: ['agentview_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] + state: + shape: [9] + action: + shape: [7] + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env_name}-finetune + run: ${now:%H-%M-%S}_${name} + +train: + n_train_itr: 301 + n_critic_warmup_itr: 2 + n_steps: 400 + gamma: 0.999 + augment: True + grad_accumulate: 20 + actor_lr: 1e-5 + actor_weight_decay: 0 + actor_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 1e-5 + critic_lr: 1e-3 + critic_weight_decay: 0 + critic_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 1e-3 + save_model_freq: 100 + val_freq: 10 + render: + freq: 1 + num: 0 + # PPO specific + reward_scale_running: True + reward_scale_const: 1.0 + gae_lambda: 0.95 + batch_size: 500 + logprob_batch_size: 1000 + update_epochs: 10 + vf_coef: 0.5 + target_kl: 1 + +model: + _target_: model.diffusion.diffusion_ppo.PPODiffusion + # HP to tune + gamma_denoising: 0.99 + clip_ploss_coef: 0.01 + clip_ploss_coef_base: 0.001 + clip_ploss_coef_rate: 3 + randn_clip_value: 3 + min_sampling_denoising_std: 0.1 + min_logprob_denoising_std: 0.1 + # + use_ddim: ${use_ddim} + ddim_steps: ${ft_denoising_steps} + learn_eta: False + eta: + base_eta: 1 + input_dim: ${obs_dim} + mlp_dims: [256, 256] + action_dim: ${action_dim} + min_eta: 0.1 + max_eta: 1.0 + _target_: model.diffusion.eta.EtaFixed + network_path: ${base_policy_path} + actor: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + critic: + _target_: model.common.critic.ViTCritic + spatial_emb: 128 + augment: False + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + mlp_dims: [256, 256, 256] + activation_type: Mish + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + ft_denoising_steps: ${ft_denoising_steps} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml index e5f382c..c296aff 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -45,21 +45,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-5 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -93,9 +93,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml index 7ed1e91..aa63306 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml @@ -57,7 +57,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 500 + n_train_itr: 301 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 @@ -66,13 +66,13 @@ train: actor_lr: 1e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 500 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 500 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -140,9 +140,9 @@ model: embed_style: embed2 embed_norm: 0 img_cond_steps: ${img_cond_steps} - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml index e5ca94b..df13e51 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -45,21 +45,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-5 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -94,9 +94,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml index e7f14ca..9e36d2a 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -46,21 +46,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-5 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -94,9 +94,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml index b5f3157..fa016ad 100644 --- a/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml +++ b/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 400 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -46,21 +46,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-5 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -95,9 +95,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml index 198855b..c189ae1 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -57,13 +57,13 @@ train: actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml index b826e06..83033bb 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml @@ -73,13 +73,13 @@ train: actor_lr: 1e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml index 5aec825..c0a94e8 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml @@ -27,7 +27,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -50,20 +50,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 2e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-6 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -76,7 +76,7 @@ train: reward_scale_const: 1.0 gae_lambda: 0.95 batch_size: 10000 - update_epochs: 8 + update_epochs: 5 vf_coef: 0.5 target_kl: 1 @@ -84,11 +84,11 @@ model: _target_: model.diffusion.diffusion_ppo.PPODiffusion # HP to tune gamma_denoising: 0.99 - clip_ploss_coef: 0.001 - clip_ploss_coef_base: 0.0001 + clip_ploss_coef: 0.01 + clip_ploss_coef_base: 0.001 clip_ploss_coef_rate: 3 randn_clip_value: 3 - min_sampling_denoising_std: 0.08 + min_sampling_denoising_std: 0.1 min_logprob_denoising_std: 0.1 # network_path: ${base_policy_path} @@ -105,10 +105,10 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} ft_denoising_steps: ${ft_denoising_steps} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml new file mode 100644 index 0000000..8754adf --- /dev/null +++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml @@ -0,0 +1,179 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent + +name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/transport/transport_pre_diffusion_unet_img_ta16_td100/2024-11-15_17-55-22_42/checkpoint/state_1000.pt +robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json +normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz + +seed: 42 +device: cuda:0 +env_name: transport +obs_dim: 18 +action_dim: 14 +denoising_steps: 100 +ft_denoising_steps: 5 +cond_steps: 1 +img_cond_steps: 1 +horizon_steps: 16 +act_steps: 8 +use_ddim: True + +env: + n_envs: 50 + name: ${env_name} + best_reward_threshold_for_success: 1 + max_episode_steps: 800 + save_video: False + use_image_obs: True + wrappers: + robomimic_image: + normalization_path: ${normalization_path} + low_dim_keys: ['robot0_eef_pos', + 'robot0_eef_quat', + 'robot0_gripper_qpos', + "robot1_eef_pos", + "robot1_eef_quat", + "robot1_gripper_qpos"] + image_keys: ['shouldercamera0_image', + 'shouldercamera1_image'] + shape_meta: ${shape_meta} + multi_step: + n_obs_steps: ${cond_steps} + n_action_steps: ${act_steps} + max_episode_steps: ${env.max_episode_steps} + reset_within_step: True + +shape_meta: + obs: + rgb: + shape: [6, 96, 96] + state: + shape: [18] + action: + shape: [14] + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env_name}-finetune + run: ${now:%H-%M-%S}_${name} + +train: + n_train_itr: 201 + n_critic_warmup_itr: 2 + n_steps: 400 + gamma: 0.999 + augment: True + grad_accumulate: 20 + actor_lr: 2e-5 + actor_weight_decay: 0 + actor_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 2e-5 + critic_lr: 1e-3 + critic_weight_decay: 0 + critic_lr_scheduler: + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 10 + min_lr: 1e-3 + save_model_freq: 100 + val_freq: 10 + render: + freq: 1 + num: 0 + # PPO specific + reward_scale_running: True + reward_scale_const: 1.0 + gae_lambda: 0.95 + batch_size: 500 + logprob_batch_size: 1000 + update_epochs: 10 + vf_coef: 0.5 + target_kl: 1 + +model: + _target_: model.diffusion.diffusion_ppo.PPODiffusion + # HP to tune + gamma_denoising: 0.99 + clip_ploss_coef: 0.01 + clip_ploss_coef_base: 0.001 + clip_ploss_coef_rate: 3 + randn_clip_value: 3 + min_sampling_denoising_std: 0.1 + min_logprob_denoising_std: 0.1 + # + use_ddim: ${use_ddim} + ddim_steps: ${ft_denoising_steps} + learn_eta: False + eta: + base_eta: 1 + input_dim: ${obs_dim} + mlp_dims: [256, 256] + action_dim: ${action_dim} + min_eta: 0.1 + max_eta: 1.0 + _target_: model.diffusion.eta.EtaFixed + network_path: ${base_policy_path} + actor: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: False + num_img: 2 + spatial_emb: 128 + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + critic: + _target_: model.common.critic.ViTCritic + spatial_emb: 128 + num_img: 2 + augment: False + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + img_h: ${shape_meta.obs.rgb.shape[1]} + img_w: ${shape_meta.obs.rgb.shape[2]} + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + mlp_dims: [256, 256, 256] + activation_type: Mish + residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + ft_denoising_steps: ${ft_denoising_steps} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml index aa6338c..b3583fb 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -48,21 +48,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-6 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -74,7 +74,7 @@ train: reward_scale_const: 1.0 gae_lambda: 0.95 batch_size: 10000 - update_epochs: 8 + update_epochs: 5 vf_coef: 0.5 target_kl: 1 @@ -87,7 +87,7 @@ model: _target_: model.common.mlp_gaussian.Gaussian_MLP mlp_dims: [1024, 1024, 1024] residual_style: True - fixed_std: 0.08 + fixed_std: 0.1 learn_fixed_std: True std_min: 0.01 std_max: 0.2 @@ -96,9 +96,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml index 286c7bb..915691d 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml @@ -61,7 +61,7 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 500 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 @@ -70,13 +70,13 @@ train: actor_lr: 1e-5 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 500 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-6 + min_lr: 1e-5 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 500 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -90,7 +90,7 @@ train: gae_lambda: 0.95 batch_size: 1000 logprob_batch_size: 1000 - update_epochs: 8 + update_epochs: 10 vf_coef: 0.5 target_kl: 1 @@ -119,7 +119,7 @@ model: spatial_emb: 128 mlp_dims: [768, 768, 768] residual_style: True - fixed_std: 0.08 + fixed_std: 0.1 learn_fixed_std: True std_min: 0.01 std_max: 0.2 @@ -146,9 +146,9 @@ model: embed_style: embed2 embed_norm: 0 img_cond_steps: ${img_cond_steps} - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml index 2681560..f7c990c 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml @@ -25,7 +25,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -48,20 +48,20 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 - min_lr: 1e-6 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 + first_cycle_steps: ${train.n_train_itr} warmup_steps: 10 min_lr: 1e-3 save_model_freq: 100 @@ -74,7 +74,7 @@ train: reward_scale_const: 1.0 gae_lambda: 0.95 batch_size: 10000 - update_epochs: 8 + update_epochs: 5 vf_coef: 0.5 target_kl: 1 @@ -97,9 +97,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml index b707736..7f43e09 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -49,21 +49,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-6 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -75,7 +75,7 @@ train: reward_scale_const: 1.0 gae_lambda: 0.95 batch_size: 10000 - update_epochs: 8 + update_epochs: 5 vf_coef: 0.5 target_kl: 1 @@ -87,7 +87,7 @@ model: _target_: model.common.mlp_gmm.GMM_MLP mlp_dims: [1024, 1024, 1024] residual_style: True - fixed_std: 0.08 + fixed_std: 0.1 learn_fixed_std: True std_min: 0.01 std_max: 0.2 @@ -97,9 +97,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml index f1b981b..62b2e81 100644 --- a/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml +++ b/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml @@ -26,7 +26,7 @@ env: name: ${env_name} best_reward_threshold_for_success: 1 max_episode_steps: 800 - save_video: false + save_video: False wrappers: robomimic_lowdim: normalization_path: ${normalization_path} @@ -49,21 +49,21 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_train_itr: 1000 + n_train_itr: 201 n_critic_warmup_itr: 2 n_steps: 400 gamma: 0.999 - actor_lr: 1e-5 + actor_lr: 1e-4 actor_weight_decay: 0 actor_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 - min_lr: 1e-6 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 + min_lr: 1e-4 critic_lr: 1e-3 critic_weight_decay: 0 critic_lr_scheduler: - first_cycle_steps: 1000 - warmup_steps: 10 + first_cycle_steps: ${train.n_train_itr} + warmup_steps: 0 min_lr: 1e-3 save_model_freq: 100 val_freq: 10 @@ -75,7 +75,7 @@ train: reward_scale_const: 1.0 gae_lambda: 0.95 batch_size: 10000 - update_epochs: 8 + update_epochs: 5 vf_coef: 0.5 target_kl: 1 @@ -98,9 +98,9 @@ model: action_dim: ${action_dim} critic: _target_: model.common.critic.CriticObs - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} mlp_dims: [256, 256, 256] activation_type: Mish residual_style: True + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml index 1fa3ec6..726b097 100644 --- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml +++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml index e95eb28..59ce739 100644 --- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml +++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml @@ -34,12 +34,12 @@ shape_meta: shape: [7] train: - n_epochs: 5000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -53,7 +53,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml index 3d47545..5c5802b 100644 --- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml +++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml index c9afe59..9e0a277 100644 --- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml +++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml index c3cc4f3..f3338fa 100644 --- a/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml +++ b/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -47,8 +47,8 @@ model: n_groups: 8 smaller_encoder: False cond_predict_scale: True - cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} horizon_steps: ${horizon_steps} obs_dim: ${obs_dim} action_dim: ${action_dim} diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml new file mode 100644 index 0000000..1592e73 --- /dev/null +++ b/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml @@ -0,0 +1,94 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent + +name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz + +seed: 42 +device: cuda:0 +env: can +obs_dim: 9 # proprioception only +action_dim: 7 +denoising_steps: 100 +horizon_steps: 4 +cond_steps: 1 +img_cond_steps: 1 + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env}-pretrain + run: ${now:%H-%M-%S}_${name} + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] # not counting img_cond_steps + state: + shape: [9] + action: + shape: [7] + +train: + n_epochs: 2000 + batch_size: 256 + learning_rate: 1e-4 + weight_decay: 1e-6 + lr_scheduler: + first_cycle_steps: 2000 + warmup_steps: 100 + min_lr: 1e-5 + save_model_freq: 500 + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: True + spatial_emb: 128 + # + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} + +ema: + decay: 0.995 + +train_dataset: + _target_: agent.dataset.sequence.StitchedSequenceDataset + use_img: True + dataset_path: ${train_dataset_path} + horizon_steps: ${horizon_steps} + max_n_episodes: 100 + cond_steps: ${cond_steps} + img_cond_steps: ${img_cond_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml index 49fecb3..2f54207 100644 --- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml +++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml index 12c949d..bc7fb07 100644 --- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml +++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 0 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml index 1320863..98c0bf4 100644 --- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml +++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml @@ -33,12 +33,12 @@ shape_meta: shape: [7] train: - n_epochs: 1000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -50,7 +50,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml index a8911d3..4eb6e3e 100644 --- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml +++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml index e8fddc7..05d7952 100644 --- a/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml +++ b/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml index 78cf40d..c463319 100644 --- a/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml +++ b/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml index e8057da..9e731cd 100644 --- a/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml +++ b/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml index a5715be..e67fd04 100644 --- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml +++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml index 5b96d97..58c22d5 100644 --- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml +++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml @@ -34,12 +34,12 @@ shape_meta: shape: [7] train: - n_epochs: 2500 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -53,7 +53,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml index fa56862..c538574 100644 --- a/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml +++ b/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml new file mode 100644 index 0000000..a8ebfb6 --- /dev/null +++ b/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml @@ -0,0 +1,94 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent + +name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz + +seed: 42 +device: cuda:0 +env: lift +obs_dim: 9 # proprioception only +action_dim: 7 +denoising_steps: 100 +horizon_steps: 4 +cond_steps: 1 +img_cond_steps: 1 + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env}-pretrain + run: ${now:%H-%M-%S}_${name} + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] # not counting img_cond_steps + state: + shape: [9] + action: + shape: [7] + +train: + n_epochs: 2000 + batch_size: 256 + learning_rate: 1e-4 + weight_decay: 1e-6 + lr_scheduler: + first_cycle_steps: 2000 + warmup_steps: 100 + min_lr: 1e-5 + save_model_freq: 500 + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: True + spatial_emb: 128 + # + diffusion_step_embed_dim: 32 + dim: 40 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} + +ema: + decay: 0.995 + +train_dataset: + _target_: agent.dataset.sequence.StitchedSequenceDataset + use_img: True + dataset_path: ${train_dataset_path} + horizon_steps: ${horizon_steps} + max_n_episodes: 100 + cond_steps: ${cond_steps} + img_cond_steps: ${img_cond_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml index 5243803..98de2ff 100644 --- a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml +++ b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml index c77508c..bfcab21 100644 --- a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml +++ b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml @@ -33,12 +33,12 @@ shape_meta: shape: [7] train: - n_epochs: 500 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 3000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -50,7 +50,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml index 6263925..695529e 100644 --- a/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml +++ b/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml index bf36bbb..4dc3065 100644 --- a/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml +++ b/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml index fbfa1c1..134e0da 100644 --- a/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml +++ b/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml index effd320..fc59c7e 100644 --- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml +++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml index e1f15ec..c5061a8 100644 --- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml +++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml @@ -34,12 +34,12 @@ shape_meta: shape: [7] train: - n_epochs: 4000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -53,7 +53,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml index fdfd118..6531116 100644 --- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml +++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml index ba7664d..3dc0e66 100644 --- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml +++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml index 96a2bba..ed43711 100644 --- a/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml +++ b/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml new file mode 100644 index 0000000..b52fde4 --- /dev/null +++ b/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml @@ -0,0 +1,94 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent + +name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz + +seed: 42 +device: cuda:0 +env: square +obs_dim: 9 # proprioception only +action_dim: 7 +denoising_steps: 100 +horizon_steps: 4 +cond_steps: 1 +img_cond_steps: 1 + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env}-pretrain + run: ${now:%H-%M-%S}_${name} + +shape_meta: + obs: + rgb: + shape: [3, 96, 96] # not counting img_cond_steps + state: + shape: [9] + action: + shape: [7] + +train: + n_epochs: 2000 + batch_size: 256 + learning_rate: 1e-4 + weight_decay: 1e-6 + lr_scheduler: + first_cycle_steps: 2000 + warmup_steps: 100 + min_lr: 1e-5 + save_model_freq: 500 + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + img_cond_steps: ${img_cond_steps} + augment: True + spatial_emb: 128 + # + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + action_dim: ${action_dim} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} + +ema: + decay: 0.995 + +train_dataset: + _target_: agent.dataset.sequence.StitchedSequenceDataset + use_img: True + dataset_path: ${train_dataset_path} + horizon_steps: ${horizon_steps} + max_n_episodes: 100 + cond_steps: ${cond_steps} + img_cond_steps: ${img_cond_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml index 0b26d26..d8bd7b3 100644 --- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml +++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml index 7b118cf..a432ba1 100644 --- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml +++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 0 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-4 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml index c9cc2f3..1ac7ea0 100644 --- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml +++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml @@ -33,12 +33,12 @@ shape_meta: shape: [7] train: - n_epochs: 4000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -50,7 +50,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml index 84fbbb4..56fecf6 100644 --- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml +++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml index c6ae3d1..9ea27e1 100644 --- a/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml +++ b/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml index 3c70528..63aff6b 100644 --- a/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml +++ b/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml index 3232db7..7900820 100644 --- a/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml +++ b/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml index 9db3685..c827d2d 100644 --- a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml +++ b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml index befefd2..699cf9b 100644 --- a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml +++ b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml @@ -27,19 +27,19 @@ wandb: shape_meta: obs: rgb: - shape: [3, 96, 96] # not counting img_cond_steps + shape: [6, 96, 96] # not counting img_cond_steps state: shape: [9] action: shape: [7] train: - n_epochs: 8000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 8000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -53,7 +53,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml index 3b7bc4e..e9902a9 100644 --- a/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml +++ b/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 8000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 10000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml new file mode 100644 index 0000000..0099c22 --- /dev/null +++ b/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml @@ -0,0 +1,95 @@ +defaults: + - _self_ +hydra: + run: + dir: ${logdir} +_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent + +name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps} +logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} +train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz + +seed: 42 +device: cuda:0 +env: transport +obs_dim: 18 # proprioception only +action_dim: 14 +denoising_steps: 100 +horizon_steps: 16 +cond_steps: 1 +img_cond_steps: 1 + +wandb: + entity: ${oc.env:DPPO_WANDB_ENTITY} + project: robomimic-${env}-pretrain + run: ${now:%H-%M-%S}_${name} + +shape_meta: + obs: + rgb: + shape: [6, 96, 96] # not counting img_cond_steps + state: + shape: [9] + action: + shape: [7] + +train: + n_epochs: 2000 + batch_size: 256 + learning_rate: 1e-4 + weight_decay: 1e-6 + lr_scheduler: + first_cycle_steps: 2000 + warmup_steps: 100 + min_lr: 1e-5 + save_model_freq: 500 + +model: + _target_: model.diffusion.diffusion.DiffusionModel + predict_epsilon: True + denoised_clip_value: 1.0 + network: + _target_: model.diffusion.unet.VisionUnet1D + backbone: + _target_: model.common.vit.VitEncoder + obs_shape: ${shape_meta.obs.rgb.shape} + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated + cfg: + patch_size: 8 + depth: 1 + embed_dim: 128 + num_heads: 4 + embed_style: embed2 + embed_norm: 0 + augment: True + num_img: 2 + spatial_emb: 128 + # + diffusion_step_embed_dim: 32 + dim: 64 + dim_mults: [1, 2] + kernel_size: 5 + n_groups: 8 + smaller_encoder: False + cond_predict_scale: True + img_cond_steps: ${img_cond_steps} + cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} + action_dim: ${action_dim} + horizon_steps: ${horizon_steps} + obs_dim: ${obs_dim} + action_dim: ${action_dim} + denoising_steps: ${denoising_steps} + device: ${device} + +ema: + decay: 0.995 + +train_dataset: + _target_: agent.dataset.sequence.StitchedSequenceDataset + use_img: True + dataset_path: ${train_dataset_path} + horizon_steps: ${horizon_steps} + max_n_episodes: 100 + cond_steps: ${cond_steps} + img_cond_steps: ${img_cond_steps} + device: ${device} \ No newline at end of file diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml index 900cdc1..7e08182 100644 --- a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml +++ b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml index 040c383..5f724f7 100644 --- a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml +++ b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml @@ -26,19 +26,19 @@ wandb: shape_meta: obs: rgb: - shape: [3, 96, 96] # not counting img_cond_steps + shape: [6, 96, 96] # not counting img_cond_steps state: shape: [9] action: shape: [7] train: - n_epochs: 5000 + n_epochs: 2000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 2000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 @@ -50,7 +50,7 @@ model: backbone: _target_: model.common.vit.VitEncoder obs_shape: ${shape_meta.obs.rgb.shape} - num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated + num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated cfg: patch_size: 8 depth: 1 diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml index 372bb2c..033868c 100644 --- a/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml +++ b/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml @@ -23,12 +23,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml index ef5daf0..dfd07ab 100644 --- a/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml +++ b/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml index cda80e7..9ce532a 100644 --- a/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml +++ b/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml @@ -24,12 +24,12 @@ wandb: run: ${now:%H-%M-%S}_${name} train: - n_epochs: 5000 + n_epochs: 3000 batch_size: 256 learning_rate: 1e-4 weight_decay: 1e-6 lr_scheduler: - first_cycle_steps: 5000 + first_cycle_steps: 3000 warmup_steps: 100 min_lr: 1e-5 save_model_freq: 500 diff --git a/model/diffusion/unet.py b/model/diffusion/unet.py index c45b9c8..0307b50 100644 --- a/model/diffusion/unet.py +++ b/model/diffusion/unet.py @@ -10,6 +10,7 @@ import torch.nn as nn import einops from einops.layers.torch import Rearrange import logging +from copy import deepcopy log = logging.getLogger(__name__) @@ -20,7 +21,8 @@ from model.diffusion.modules import ( Conv1dBlock, ) from model.common.mlp import ResidualMLP - +from model.diffusion.modules import SinusoidalPosEmb +from model.common.modules import SpatialEmb, RandomShiftsAug class ResidualBlock1D(nn.Module): @@ -323,3 +325,295 @@ class Unet1D(nn.Module): x = einops.rearrange(x, "b t h -> b h t") return x + + +class VisionUnet1D(nn.Module): + + def __init__( + self, + backbone, + action_dim, + img_cond_steps=1, + cond_dim=None, + diffusion_step_embed_dim=32, + dim=32, + dim_mults=(1, 2, 4, 8), + smaller_encoder=False, + cond_mlp_dims=None, + kernel_size=5, + n_groups=None, + activation_type="Mish", + cond_predict_scale=False, + groupnorm_eps=1e-5, + spatial_emb=0, + dropout=0, + num_img=1, + augment=False, + ): + super().__init__() + + # vision + self.backbone = backbone + if augment: + self.aug = RandomShiftsAug(pad=4) + self.augment = augment + self.num_img = num_img + self.img_cond_steps = img_cond_steps + if spatial_emb > 0: + assert spatial_emb > 1, "this is the dimension" + if num_img > 1: + self.compress1 = SpatialEmb( + num_patch=self.backbone.num_patch, + patch_dim=self.backbone.patch_repr_dim, + prop_dim=cond_dim, + proj_dim=spatial_emb, + dropout=dropout, + ) + self.compress2 = deepcopy(self.compress1) + else: # TODO: clean up + self.compress = SpatialEmb( + num_patch=self.backbone.num_patch, + patch_dim=self.backbone.patch_repr_dim, + prop_dim=cond_dim, + proj_dim=spatial_emb, + dropout=dropout, + ) + visual_feature_dim = spatial_emb * num_img + else: + self.compress = nn.Sequential( + nn.Linear(self.backbone.repr_dim, visual_feature_dim), + nn.LayerNorm(visual_feature_dim), + nn.Dropout(dropout), + nn.ReLU(), + ) + + # unet + dims = [action_dim, *map(lambda m: dim * m, dim_mults)] + in_out = list(zip(dims[:-1], dims[1:])) + log.info(f"Channel dimensions: {in_out}") + + dsed = diffusion_step_embed_dim + self.time_mlp = nn.Sequential( + SinusoidalPosEmb(dsed), + nn.Linear(dsed, dsed * 4), + nn.Mish(), + nn.Linear(dsed * 4, dsed), + ) + if cond_mlp_dims is not None: + self.cond_mlp = ResidualMLP( + dim_list=[cond_dim] + cond_mlp_dims, + activation_type=activation_type, + out_activation_type="Identity", + ) + cond_block_dim = dsed + cond_mlp_dims[-1] + visual_feature_dim + else: + cond_block_dim = dsed + cond_dim + visual_feature_dim + use_large_encoder_in_block = cond_mlp_dims is None and not smaller_encoder + + mid_dim = dims[-1] + self.mid_modules = nn.ModuleList( + [ + ResidualBlock1D( + mid_dim, + mid_dim, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + ResidualBlock1D( + mid_dim, + mid_dim, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + ] + ) + + self.down_modules = nn.ModuleList([]) + for ind, (dim_in, dim_out) in enumerate(in_out): + is_last = ind >= (len(in_out) - 1) + self.down_modules.append( + nn.ModuleList( + [ + ResidualBlock1D( + dim_in, + dim_out, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + ResidualBlock1D( + dim_out, + dim_out, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + Downsample1d(dim_out) if not is_last else nn.Identity(), + ] + ) + ) + + self.up_modules = nn.ModuleList([]) + for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])): + is_last = ind >= (len(in_out) - 1) + self.up_modules.append( + nn.ModuleList( + [ + ResidualBlock1D( + dim_out * 2, + dim_in, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + ResidualBlock1D( + dim_in, + dim_in, + cond_dim=cond_block_dim, + kernel_size=kernel_size, + n_groups=n_groups, + cond_predict_scale=cond_predict_scale, + larger_encoder=use_large_encoder_in_block, + activation_type=activation_type, + groupnorm_eps=groupnorm_eps, + ), + Upsample1d(dim_in) if not is_last else nn.Identity(), + ] + ) + ) + + self.final_conv = nn.Sequential( + Conv1dBlock( + dim, + dim, + kernel_size=kernel_size, + n_groups=n_groups, + activation_type=activation_type, + eps=groupnorm_eps, + ), + nn.Conv1d(dim, action_dim, 1), + ) + + def forward( + self, + x, + time, + cond, + **kwargs, + ): + """ + x: (B, Ta, act_dim) + time: (B,) or int, diffusion step + cond: dict with key state/rgb; more recent obs at the end + state: (B, To, obs_dim) + """ + B = len(x) + _, T_rgb, C, H, W = cond["rgb"].shape + + # move chunk dim to the end + x = einops.rearrange(x, "b h t -> b t h") + + # flatten history + state = cond["state"].view(B, -1) + + # obs encoder + if hasattr(self, "cond_mlp"): + state = self.cond_mlp(state) + + # Take recent images --- sometimes we want to use fewer img_cond_steps than cond_steps (e.g., 1 image but 3 prio) + rgb = cond["rgb"][:, -self.img_cond_steps :] + + # concatenate images in cond by channels + if self.num_img > 1: + rgb = rgb.reshape(B, T_rgb, self.num_img, 3, H, W) + rgb = einops.rearrange(rgb, "b t n c h w -> b n (t c) h w") + else: + rgb = einops.rearrange(rgb, "b t c h w -> b (t c) h w") + + # convert rgb to float32 for augmentation + rgb = rgb.float() + + # get vit output - pass in two images separately + if self.num_img > 1: # TODO: properly handle multiple images + rgb1 = rgb[:, 0] + rgb2 = rgb[:, 1] + if self.augment: + rgb1 = self.aug(rgb1) + rgb2 = self.aug(rgb2) + feat1 = self.backbone(rgb1) + feat2 = self.backbone(rgb2) + feat1 = self.compress1.forward(feat1, state) + feat2 = self.compress2.forward(feat2, state) + feat = torch.cat([feat1, feat2], dim=-1) + else: # single image + if self.augment: + rgb = self.aug(rgb) + feat = self.backbone(rgb) + + # compress + if isinstance(self.compress, SpatialEmb): + feat = self.compress.forward(feat, state) + else: + feat = feat.flatten(1, -1) + feat = self.compress(feat) + cond_encoded = torch.cat([feat, state], dim=-1) + + # 1. time + if not torch.is_tensor(time): + time = torch.tensor([time], dtype=torch.long, device=x.device) + elif torch.is_tensor(time) and len(time.shape) == 0: + time = time[None].to(x.device) + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + time = time.expand(x.shape[0]) + global_feature = self.time_mlp(time) + global_feature = torch.cat([global_feature, cond_encoded], axis=-1) + + # encode local features + h_local = list() + h = [] + for idx, (resnet, resnet2, downsample) in enumerate(self.down_modules): + x = resnet(x, global_feature) + if idx == 0 and len(h_local) > 0: + x = x + h_local[0] + x = resnet2(x, global_feature) + h.append(x) + x = downsample(x) + + for mid_module in self.mid_modules: + x = mid_module(x, global_feature) + + for idx, (resnet, resnet2, upsample) in enumerate(self.up_modules): + x = torch.cat((x, h.pop()), dim=1) + x = resnet(x, global_feature) + if idx == len(self.up_modules) and len(h_local) > 0: + x = x + h_local[1] + x = resnet2(x, global_feature) + x = upsample(x) + + x = self.final_conv(x) + + x = einops.rearrange(x, "b t h -> b h t") + return x + diff --git a/pyproject.toml b/pyproject.toml index b1dbffe..a432bef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dppo" -version = "0.6.0" +version = "0.7.0" description = "Fine-tuning diffusion policies with PPO." readme = "README.md" requires-python = ">=3.8" diff --git a/script/download_url.py b/script/download_url.py index d50ee4d..ca25fc0 100644 --- a/script/download_url.py +++ b/script/download_url.py @@ -279,6 +279,11 @@ def get_checkpoint_download_url(cfg): in path ): return "https://drive.google.com/file/d/1T-NGgBmT-UmcVWADygXj873IyWLewvsU/view?usp=drive_link" + elif ( + "lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt" + in path + ): + return "https://drive.google.com/file/d/1-gB4Tz5ityFMnegX7uRz5PCcb7-JTOZg/view?usp=drive_link" elif ( "lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt" in path @@ -323,6 +328,11 @@ def get_checkpoint_download_url(cfg): in path ): return "https://drive.google.com/file/d/1s346KCe2aar_tXX7u8rzjRF3kpwVpH5c/view?usp=drive_link" + elif ( + "can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt" + in path + ): + return "https://drive.google.com/file/d/1SHKcorbyGDg3I0h6hvOkQXWQT4dD0gGh/view?usp=drive_link" elif ( "can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt" in path @@ -393,6 +403,11 @@ def get_checkpoint_download_url(cfg): in path ): return "https://drive.google.com/file/d/11IEgQe0LFI23hn1Cwf6Z_YfJdDilVc0z/view?usp=drive_link" + elif ( + "square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt" + in path + ): + return "https://drive.google.com/file/d/1fcOi9srPOVPwEzBRV-1pzVDq3uap3YAs/view?usp=drive_link" elif ( "square_pre_diffusion_mlp_ta4_td20/2024-07-10_01-46-16/checkpoint/state_8000.pt" in path @@ -459,6 +474,11 @@ def get_checkpoint_download_url(cfg): in path ): return "https://drive.google.com/file/d/1MNGT8j9x1uudugGUcia-xwP_7f7xVY4K/view?usp=drive_link" + elif ( + "transport_pre_diffusion_unet_img_ta16_td100/2024-11-15_17-55-22_42/checkpoint/state_1000.pt" + in path + ): + return "https://drive.google.com/file/d/1G5LTxgRZvPm7NCbfByL4q_FbNvRgWguW/view?usp=drive_link" elif ( "transport_pre_diffusion_mlp_ta8_td20/2024-07-08_11-18-59/checkpoint/state_8000.pt" in path