diff --git a/agent/eval/eval_agent.py b/agent/eval/eval_agent.py
index d982bd5..d6774e3 100644
--- a/agent/eval/eval_agent.py
+++ b/agent/eval/eval_agent.py
@@ -57,6 +57,7 @@ class EvalAgent:
         self.horizon_steps = cfg.horizon_steps
         self.max_episode_steps = cfg.env.max_episode_steps
         self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
+        self.save_full_observations = cfg.env.get("save_full_observations", False)
         self.furniture_sparse_reward = (
             cfg.env.specific.get("sparse_reward", False)
             if "specific" in cfg.env
@@ -85,6 +86,10 @@ class EvalAgent:
         assert not (
             self.n_render <= 0 and self.render_video
         ), "Need to set n_render > 0 if saving video"
+        self.traj_plotter = (
+            hydra.utils.instantiate(cfg.plotter)
+            if "plotter" in cfg else None
+        )
 
     def run(self):
         pass
diff --git a/agent/eval/eval_diffusion_agent.py b/agent/eval/eval_diffusion_agent.py
index 577c11b..6833372 100644
--- a/agent/eval/eval_diffusion_agent.py
+++ b/agent/eval/eval_diffusion_agent.py
@@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent):
         prev_obs_venv = self.reset_env_all(options_venv=options_venv)
         firsts_trajs[0] = 1
         reward_trajs = np.zeros((self.n_steps, self.n_envs))
+        if self.save_full_observations:  # state-only
+            obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
+            obs_full_trajs = np.vstack(
+                (obs_full_trajs, prev_obs_venv["state"][:, -1][None])
+            )
 
         # Collect a set of trajectories from env
         for step in range(self.n_steps):
@@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent):
             )
             reward_trajs[step] = reward_venv
             firsts_trajs[step + 1] = terminated_venv | truncated_venv
+            if self.save_full_observations:  # state-only
+                obs_full_venv = np.array(
+                    [info["full_obs"]["state"] for info in info_venv]
+                )  # n_envs x act_steps x obs_dim
+                obs_full_trajs = np.vstack(
+                    (obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
+                )
 
             # update for next step
             prev_obs_venv = obs_venv
@@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent):
             success_rate = 0
             log.info("[WARNING] No episode completed within the iteration!")
 
+        # Plot state trajectories (only in D3IL)
+        if self.traj_plotter is not None:
+            self.traj_plotter(
+                obs_full_trajs=obs_full_trajs,
+                n_render=self.n_render,
+                max_episode_steps=self.max_episode_steps,
+                render_dir=self.render_dir,
+                itr=0,
+            )
+
         # Log loss and save metrics
         time = timer()
         log.info(
diff --git a/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
new file mode 100644
index 0000000..e3d0653
--- /dev/null
+++ b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:  
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: avoiding-m5
+obs_dim: 4
+action_dim: 2
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 25
+render_num: 40
+
+plotter:
+  _target_: env.plot_traj.TrajPlotter
+  env_type: avoid
+  normalization_path: ${normalization_path}
+
+env:
+  n_envs: 40
+  name: ${env_name}
+  max_episode_steps: 100
+  reset_at_iteration: True
+  save_video: False
+  best_reward_threshold_for_success: 2
+  save_full_observations: True
+  wrappers:
+    d3il_lowdim:
+      normalization_path: ${normalization_path}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      pass_full_observations: ${env.save_full_observations}
+      reset_within_step: False
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 16
+    mlp_dims: [512, 512, 512]
+    activation_type: ReLU
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
index d938219..479868d 100644
--- a/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m1/pre_diffusion_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 15000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 15000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml
index cc9898d..13ea725 100644
--- a/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m1/pre_gaussian_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml
index 24c4fef..0f6d6d0 100644
--- a/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m1/pre_gmm_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
-  batch_size: 32
+  n_epochs: 5000
+  batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml
index 1d7479e..cb4aaaf 100644
--- a/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m2/pre_diffusion_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 15000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 15000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml
index 0828b4d..ed1aa50 100644
--- a/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m2/pre_gaussian_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml
index ae1af2a..0be031c 100644
--- a/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m2/pre_gmm_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
-  batch_size: 32
+  n_epochs: 5000
+  batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml
index 4bdd65a..580664b 100644
--- a/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m3/pre_diffusion_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 15000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 15000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml
index b2aeaf7..7eb09bd 100644
--- a/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m3/pre_gaussian_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
+  n_epochs: 5000
   batch_size: 16
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml b/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml
index e860bc0..9b908b0 100644
--- a/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml
+++ b/cfg/d3il/pretrain/avoid_m3/pre_gmm_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
+  n_epochs: 5000
   batch_size: 32
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 5000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/finetuning.md b/cfg/finetuning.md
index b7532f2..6fde4df 100644
--- a/cfg/finetuning.md
+++ b/cfg/finetuning.md
@@ -1,5 +1,7 @@
 ## Fine-tuning experiments
 
+**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.
+
 ### Comparing diffusion-based RL algorithms (Sec. 5.1)
 Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.
 
diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
new file mode 100644
index 0000000..ea712a7
--- /dev/null
+++ b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
@@ -0,0 +1,66 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
+
+seed: 42
+device: cuda:0
+env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
+obs_dim: 44
+action_dim: 10
+denoising_steps: 100
+cond_steps: 1
+horizon_steps: 8
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
+render_num: 0
+
+env:
+  n_envs: 1000
+  name: ${env_name}
+  env_type: furniture
+  max_episode_steps: 1000
+  best_reward_threshold_for_success: 2
+  specific:
+    headless: true
+    furniture: lamp
+    randomness: low
+    normalization_path: ${normalization_path}
+    obs_steps: ${cond_steps}
+    act_steps: ${act_steps}
+    sparse_reward: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 32
+    mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
+    cond_mlp_dims: [512, 64]
+    use_layernorm: True # needed for larger MLP
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..6de459a
--- /dev/null
+++ b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
+
+seed: 42
+device: cuda:0
+env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
+obs_dim: 44
+action_dim: 10
+denoising_steps: 100
+cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
+render_num: 0
+
+env:
+  n_envs: 1000
+  name: ${env_name}
+  env_type: furniture
+  max_episode_steps: 1000
+  best_reward_threshold_for_success: 2
+  specific:
+    headless: true
+    furniture: lamp
+    randomness: low
+    normalization_path: ${normalization_path}
+    obs_steps: ${cond_steps}
+    act_steps: ${act_steps}
+    sparse_reward: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 64
+    dim_mults: [1, 2, 4]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    groupnorm_eps: 1e-4 # not important
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml
index 2f8d0ef..3694c70 100644
--- a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml
+++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml
@@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
 
 name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt
+base_policy_path:
 normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
 
 seed: 42
diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..c1626d9
--- /dev/null
+++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
+
+seed: 42
+device: cuda:0
+env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
+obs_dim: 58
+action_dim: 10
+denoising_steps: 100
+cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
+render_num: 0
+
+env:
+  n_envs: 1000
+  name: ${env_name}
+  env_type: furniture
+  max_episode_steps: 700
+  best_reward_threshold_for_success: 1
+  specific:
+    headless: true
+    furniture: one_leg
+    randomness: low
+    normalization_path: ${normalization_path}
+    obs_steps: ${cond_steps}
+    act_steps: ${act_steps}
+    sparse_reward: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 64
+    dim_mults: [1, 2, 4]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    groupnorm_eps: 1e-4 # not important
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
new file mode 100644
index 0000000..9109faf
--- /dev/null
+++ b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
@@ -0,0 +1,66 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
+
+seed: 42
+device: cuda:0
+env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
+obs_dim: 44
+action_dim: 10
+denoising_steps: 100
+cond_steps: 1
+horizon_steps: 8
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
+render_num: 0
+
+env:
+  n_envs: 1000
+  name: ${env_name}
+  env_type: furniture
+  max_episode_steps: 1000
+  best_reward_threshold_for_success: 2
+  specific:
+    headless: true
+    furniture: round_table
+    randomness: low
+    normalization_path: ${normalization_path}
+    obs_steps: ${cond_steps}
+    act_steps: ${act_steps}
+    sparse_reward: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 32
+    mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
+    cond_mlp_dims: [512, 64]
+    use_layernorm: True # needed for larger MLP
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..daf050e
--- /dev/null
+++ b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
+
+seed: 42
+device: cuda:0
+env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
+obs_dim: 44
+action_dim: 10
+denoising_steps: 100
+cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
+render_num: 0
+
+env:
+  n_envs: 1000
+  name: ${env_name}
+  env_type: furniture
+  max_episode_steps: 1000
+  best_reward_threshold_for_success: 2
+  specific:
+    headless: true
+    furniture: round_table
+    randomness: low
+    normalization_path: ${normalization_path}
+    obs_steps: ${cond_steps}
+    act_steps: ${act_steps}
+    sparse_reward: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 64
+    dim_mults: [1, 2, 4]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    groupnorm_eps: 1e-4 # not important
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml
index a9b4fa6..438d4cf 100644
--- a/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/lamp_low/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml
index bec9393..61785de 100644
--- a/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/lamp_low/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml
index e07591d..bd383a9 100644
--- a/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/lamp_low/pre_gaussian_mlp.yaml
@@ -30,7 +30,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml
index d6fb48d..f83fab4 100644
--- a/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/lamp_med/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml
index 4ed0a25..c2c8568 100644
--- a/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/lamp_med/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml
index 61b3ac1..c58ef3b 100644
--- a/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/lamp_med/pre_gaussian_mlp.yaml
@@ -30,7 +30,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
index 6c733d7..bfbb4c4 100644
--- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml
index c2a44da..57d0a1f 100644
--- a/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/one_leg_low/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml
index 5175736..2d43baa 100644
--- a/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/one_leg_low/pre_gaussian_mlp.yaml
@@ -30,7 +30,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml
index d06a3a7..551b028 100644
--- a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml
index 8a31f26..ccf0e67 100644
--- a/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/one_leg_med/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml
index e7b9e7d..b58dd2b 100644
--- a/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/one_leg_med/pre_gaussian_mlp.yaml
@@ -25,12 +25,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 10000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml
index a49e1ce..66a23a4 100644
--- a/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/round_table_low/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml
index 4fd8cf0..63d4df7 100644
--- a/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/round_table_low/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml
index a2252e6..e44488c 100644
--- a/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/round_table_low/pre_gaussian_mlp.yaml
@@ -30,7 +30,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml b/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml
index b81c31d..a5f2865 100644
--- a/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml
+++ b/cfg/furniture/pretrain/round_table_med/pre_diffusion_mlp.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml b/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml
index cee0254..b8a5fec 100644
--- a/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml
+++ b/cfg/furniture/pretrain/round_table_med/pre_diffusion_unet.yaml
@@ -31,7 +31,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 8000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml b/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml
index c7d0e30..041b511 100644
--- a/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml
+++ b/cfg/furniture/pretrain/round_table_med/pre_gaussian_mlp.yaml
@@ -30,7 +30,7 @@ train:
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml
index bfef1e1..b8e0404 100644
--- a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml
+++ b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml
@@ -17,10 +17,10 @@ obs_dim: 17
 action_dim: 6
 denoising_steps: 20
 cond_steps: 1
-horizon_steps: 1
-act_steps: 1
+horizon_steps: 4
+act_steps: 4
 
-n_steps: 1000  # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
+n_steps: 250  # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
 render_num: 0
 
 env:
diff --git a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml
index 754ed1e..4c0fcac 100644
--- a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml
+++ b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml
@@ -20,7 +20,7 @@ cond_steps: 1
 horizon_steps: 4
 act_steps: 4
 
-n_steps: 500  # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
+n_steps: 250  # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
 render_num: 0
 
 env:
diff --git a/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
new file mode 100644
index 0000000..1b1d2b0
--- /dev/null
+++ b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
@@ -0,0 +1,61 @@
+defaults:
+  - _self_
+hydra:
+  run:  
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: walker2d-medium-v2
+obs_dim: 17
+action_dim: 6
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 250  # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
+render_num: 0
+
+env:
+  n_envs: 40
+  name: ${env_name}
+  max_episode_steps: 1000
+  reset_at_iteration: False
+  save_video: False
+  best_reward_threshold_for_success: 3  # success rate not relevant for gym tasks
+  wrappers:
+    mujoco_locomotion_lowdim:
+      normalization_path: ${normalization_path}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 16
+    mlp_dims: [512, 512, 512]
+    activation_type: ReLU
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
index 4c368b0..88ff719 100644
--- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 3000
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 3000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 100
diff --git a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml
index 53c74b5..050b922 100644
--- a/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/halfcheetah-medium-v2/pre_gaussian_mlp.yaml
@@ -23,15 +23,14 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 500
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
- 
   save_model_freq: 100
 
 model:
diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
index 01b0df8..6d6fb0a 100644
--- a/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/hopper-medium-v2/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 3000
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 3000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 100
diff --git a/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml
index c4d0fe3..54099d2 100644
--- a/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/hopper-medium-v2/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 500
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 100
diff --git a/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml
index 67b726c..49c8454 100644
--- a/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-complete-v0/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 3000
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml
index ff479d1..a749342 100644
--- a/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-complete-v0/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 0
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml
index 959b405..b8f2855 100644
--- a/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-mixed-v0/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 3000
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml
index e498113..67aa18b 100644
--- a/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-mixed-v0/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml
index affc770..acb5fb6 100644
--- a/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-partial-v0/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-5
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 3000
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml
index be98366..35e0579 100644
--- a/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/kitchen-partial-v0/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
index 6f32b0c..ccaf830 100644
--- a/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
+++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 3000
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-3
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 3000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 100
diff --git a/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml b/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml
index 99b2f8c..d24932d 100644
--- a/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml
+++ b/cfg/gym/pretrain/walker2d-medium-v2/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 3000
+  n_epochs: 200
   batch_size: 128
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 3000
+    first_cycle_steps: 200
     warmup_steps: 1
     min_lr: 1e-4
   save_model_freq: 100
diff --git a/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml
index 49f11ed..052cf90 100644
--- a/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml
+++ b/cfg/gym/scratch/halfcheetah-v2/ppo_diffusion_mlp.yaml
@@ -1,7 +1,7 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
 
@@ -42,7 +42,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 501
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -55,7 +55,7 @@ train:
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 1000
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -67,7 +67,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 10000
+  batch_size: 5000
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
@@ -75,7 +75,7 @@ train:
 model:
   _target_: model.diffusion.diffusion_ppo.PPODiffusion
   # HP to tune
-  gamma_denoising: 0.99
+  gamma_denoising: 1
   clip_ploss_coef: 0.1
   clip_ploss_coef_base: 0.1
   clip_ploss_coef_rate: 3
@@ -94,10 +94,10 @@ model:
     residual_style: True
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   ft_denoising_steps: ${ft_denoising_steps}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
diff --git a/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml
index b0c1241..ed2c881 100644
--- a/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml
+++ b/cfg/gym/scratch/halfcheetah-v2/ppo_gaussian_mlp.yaml
@@ -40,7 +40,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 501
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -65,7 +65,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 1000
+  batch_size: 500
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
diff --git a/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml
index 729a0c6..39edb31 100644
--- a/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml
+++ b/cfg/gym/scratch/hopper-v2/ppo_diffusion_mlp.yaml
@@ -42,7 +42,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 301
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -67,7 +67,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 10000
+  batch_size: 5000
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
@@ -75,7 +75,7 @@ train:
 model:
   _target_: model.diffusion.diffusion_ppo.PPODiffusion
   # HP to tune
-  gamma_denoising: 0.99
+  gamma_denoising: 1
   clip_ploss_coef: 0.1
   clip_ploss_coef_base: 0.1
   clip_ploss_coef_rate: 3
diff --git a/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml
index 05f5766..941ead1 100644
--- a/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml
+++ b/cfg/gym/scratch/hopper-v2/ppo_gaussian_mlp.yaml
@@ -40,7 +40,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 301
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -65,7 +65,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 1000
+  batch_size: 500
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
diff --git a/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml b/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml
index 2c1769f..89a0c85 100644
--- a/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml
+++ b/cfg/gym/scratch/walker2d-v2/ppo_diffusion_mlp.yaml
@@ -42,7 +42,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 501
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -55,7 +55,7 @@ train:
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 1000
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -67,7 +67,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 10000
+  batch_size: 5000
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
@@ -75,7 +75,7 @@ train:
 model:
   _target_: model.diffusion.diffusion_ppo.PPODiffusion
   # HP to tune
-  gamma_denoising: 0.99
+  gamma_denoising: 1
   clip_ploss_coef: 0.1
   clip_ploss_coef_base: 0.1
   clip_ploss_coef_rate: 3
@@ -94,10 +94,10 @@ model:
     residual_style: True
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   ft_denoising_steps: ${ft_denoising_steps}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
diff --git a/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml b/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml
index 70b6267..5a32530 100644
--- a/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml
+++ b/cfg/gym/scratch/walker2d-v2/ppo_gaussian_mlp.yaml
@@ -40,7 +40,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 301
   n_critic_warmup_itr: 0
   n_steps: 1000
   gamma: 0.99
@@ -65,7 +65,7 @@ train:
   reward_scale_running: True
   reward_scale_const: 1.0
   gae_lambda: 0.95
-  batch_size: 1000
+  batch_size: 500
   update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
diff --git a/cfg/pretraining.md b/cfg/pretraining.md
index 84a2b5a..943677e 100644
--- a/cfg/pretraining.md
+++ b/cfg/pretraining.md
@@ -1,6 +1,6 @@
 ## Pre-training experiments
 
-**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints.
+**Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6.
 
 ### Comparing diffusion-based RL algorithms (Sec. 5.1)
 Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.
diff --git a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml
index b100545..55db305 100644
--- a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml
@@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
 
 name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt
+base_policy_path:
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
 normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
 
@@ -28,7 +28,7 @@ n_steps: 300  # each episode takes max_episode_steps / act_steps steps
 render_num: 0
 
 env:
-  n_envs: 50
+  n_envs: 20  # reduce gpu usage
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..2d1ac3b
--- /dev/null
+++ b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: can
+obs_dim: 23
+action_dim: 7
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 75  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 40
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  wrappers:
+    robomimic_lowdim:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                    'robot0_eef_quat',
+                    'robot0_gripper_qpos',
+                    'object'] # same order of preprocessed observations
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
new file mode 100644
index 0000000..3b8f643
--- /dev/null
+++ b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
@@ -0,0 +1,102 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: can
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 300  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 20  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['robot0_eye_in_hand_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults:
+    - 1
+    - 2
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: false
+    cond_predict_scale: true
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml b/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml
index 25a3719..2efb0dc 100644
--- a/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml
+++ b/cfg/robomimic/eval/can/eval_gaussian_mlp.yaml
@@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
 
 name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt
+base_policy_path:
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
 normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
 
diff --git a/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml b/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml
index 7aa0269..4b6507a 100644
--- a/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/eval/can/eval_gaussian_mlp_img.yaml
@@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent
 
 name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt
+base_policy_path:
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
 normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
 
diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
new file mode 100644
index 0000000..6a0aa81
--- /dev/null
+++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
@@ -0,0 +1,65 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: lift
+obs_dim: 19
+action_dim: 7
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 300  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  wrappers:
+    robomimic_lowdim:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                    'robot0_eef_quat',
+                    'robot0_gripper_qpos',
+                    'object'] # same order of preprocessed observations
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 16
+    mlp_dims: [512, 512, 512]
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
new file mode 100644
index 0000000..bf8c232
--- /dev/null
+++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
@@ -0,0 +1,97 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: lift
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 300  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 20  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['robot0_eye_in_hand_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    augment: False
+    spatial_emb: 128
+    time_dim: 32
+    mlp_dims: [512, 512, 512]
+    residual_style: True
+    img_cond_steps: ${img_cond_steps}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..800354c
--- /dev/null
+++ b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: lift
+obs_dim: 19
+action_dim: 7
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 75  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 40
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  wrappers:
+    robomimic_lowdim:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                    'robot0_eef_quat',
+                    'robot0_gripper_qpos',
+                    'object'] # same order of preprocessed observations
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
new file mode 100644
index 0000000..35c567b
--- /dev/null
+++ b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
@@ -0,0 +1,100 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: lift
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 300  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 20  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['robot0_eye_in_hand_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml
index 759c653..1009edc 100644
--- a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml
+++ b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml
@@ -18,8 +18,8 @@ obs_dim: 23
 action_dim: 7
 denoising_steps: 20
 cond_steps: 1
-horizon_steps: 1
-act_steps: 1
+horizon_steps: 4
+act_steps: 4
 
 n_steps: 400  # each episode takes max_episode_steps / act_steps steps
 render_num: 0
diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
new file mode 100644
index 0000000..624a1b3
--- /dev/null
+++ b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
@@ -0,0 +1,97 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: square
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 400  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 20  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 400
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['agentview_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    augment: False
+    spatial_emb: 128
+    time_dim: 32
+    mlp_dims: [768, 768, 768]
+    residual_style: True
+    img_cond_steps: ${img_cond_steps}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..7280703
--- /dev/null
+++ b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml
@@ -0,0 +1,68 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: square
+obs_dim: 23
+action_dim: 7
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+
+n_steps: 100  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 400
+  save_video: False
+  wrappers:
+    robomimic_lowdim:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                    'robot0_eef_quat',
+                    'robot0_gripper_qpos',
+                    'object'] # same order of preprocessed observations
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
new file mode 100644
index 0000000..d35d975
--- /dev/null
+++ b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
@@ -0,0 +1,102 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: square
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 400  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 30  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 400
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['agentview_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults:
+    - 1
+    - 2
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: false
+    cond_predict_scale: true
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/square/eval_gaussian_mlp.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml
similarity index 62%
rename from cfg/robomimic/eval/square/eval_gaussian_mlp.yaml
rename to cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml
index 3e6a089..23826aa 100644
--- a/cfg/robomimic/eval/square/eval_gaussian_mlp.yaml
+++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml
@@ -3,9 +3,9 @@ defaults:
 hydra:
   run:
     dir: ${logdir}
-_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
 
-name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
+name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
 base_policy_path:
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
@@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.
 
 seed: 42
 device: cuda:0
-env_name: square
-obs_dim: 23
-action_dim: 7
+env_name: transport
+obs_dim: 59
+action_dim: 14
+denoising_steps: 20
 cond_steps: 1
-horizon_steps: 1
-act_steps: 1
+horizon_steps: 8
+act_steps: 8
 
 n_steps: 400  # each episode takes max_episode_steps / act_steps steps
 render_num: 0
@@ -27,7 +28,7 @@ env:
   n_envs: 50
   name: ${env_name}
   best_reward_threshold_for_success: 1
-  max_episode_steps: 400
+  max_episode_steps: 800
   save_video: False
   wrappers:
     robomimic_lowdim:
@@ -35,6 +36,9 @@ env:
       low_dim_keys: ['robot0_eef_pos',
                     'robot0_eef_quat',
                     'robot0_gripper_qpos',
+                    "robot1_eef_pos",
+                    "robot1_eef_quat",
+                    "robot1_gripper_qpos",
                     'object'] # same order of preprocessed observations
     multi_step:
       n_obs_steps: ${cond_steps}
@@ -42,19 +46,24 @@ env:
       max_episode_steps: ${env.max_episode_steps}
       reset_within_step: True
 
+
 model:
-  _target_: model.common.gaussian.GaussianModel
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
   randn_clip_value: 3
   #
   network_path: ${base_policy_path}
   network:
-    _target_: model.common.mlp_gaussian.Gaussian_MLP
+    _target_: model.diffusion.mlp_diffusion.DiffusionMLP
+    time_dim: 32
     mlp_dims: [1024, 1024, 1024]
-    activation_type: ReLU
-    use_layernorm: true
-    fixed_std: 0.1
+    residual_style: True
     cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     horizon_steps: ${horizon_steps}
-    
+    action_dim: ${action_dim}
   horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
new file mode 100644
index 0000000..7413246
--- /dev/null
+++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
@@ -0,0 +1,102 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: transport
+obs_dim: 18
+action_dim: 14
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 8
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 200  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 30  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 800
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos',
+                     "robot1_eef_pos",
+                     "robot1_eef_quat",
+                     "robot1_gripper_qpos"]
+      image_keys: ['shouldercamera0_image', 
+                   'shouldercamera1_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [6, 96, 96]
+    state:
+      shape: [18]
+  action: 
+    shape: [14]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    augment: False
+    num_img: 2
+    spatial_emb: 128
+    time_dim: 32
+    mlp_dims: [768, 768, 768]
+    residual_style: True
+    img_cond_steps: ${img_cond_steps}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    horizon_steps: ${horizon_steps}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
new file mode 100644
index 0000000..e644bfc
--- /dev/null
+++ b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
@@ -0,0 +1,71 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: transport
+obs_dim: 59
+action_dim: 14
+denoising_steps: 20
+cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+
+n_steps: 100  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 800
+  save_video: False
+  wrappers:
+    robomimic_lowdim:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                    'robot0_eef_quat',
+                    'robot0_gripper_qpos',
+                    "robot1_eef_pos",
+                    "robot1_eef_quat",
+                    "robot1_gripper_qpos",
+                    'object'] # same order of preprocessed observations
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.Unet1D
+    diffusion_step_embed_dim: 16
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
new file mode 100644
index 0000000..81b0046
--- /dev/null
+++ b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
@@ -0,0 +1,107 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
+
+name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path:
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: transport
+obs_dim: 18
+action_dim: 14
+denoising_steps: 100
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+use_ddim: True
+ddim_steps: 5
+
+n_steps: 400  # each episode takes max_episode_steps / act_steps steps
+render_num: 0
+
+env:
+  n_envs: 30  # reduce gpu usage
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 800
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos',
+                     "robot1_eef_pos",
+                     "robot1_eef_quat",
+                     "robot1_gripper_qpos"]
+      image_keys: ['shouldercamera0_image', 
+                   'shouldercamera1_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [6, 96, 96]
+    state:
+      shape: [18]
+  action: 
+    shape: [14]
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  randn_clip_value: 3
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ddim_steps}
+  network_path: ${base_policy_path}
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    num_img: 2
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults:
+    - 1
+    - 2
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: false
+    cond_predict_scale: true
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml
index 8256876..4c68449 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp.yaml
@@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
 
 name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt  # use 8000 for comparing policy parameterizations
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt  # use 5000 for comparing diffusion rl algorithms
+# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt  # use 8000 for comparing policy parameterizations
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
 normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
 
@@ -54,13 +55,13 @@ train:
   actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml
index 54a4ab1..24117e7 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_mlp_img.yaml
@@ -66,16 +66,16 @@ train:
   gamma: 0.999
   augment: True
   grad_accumulate: 15
-  actor_lr: 1e-4
+  actor_lr: 5e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-4
+    min_lr: 5e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml
index 6f3c0ce..a21c180 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet.yaml
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -47,20 +47,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 151
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
new file mode 100644
index 0000000..4e3c56c
--- /dev/null
+++ b/cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
@@ -0,0 +1,173 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
+
+name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: can
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+ft_denoising_steps: 5
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['robot0_eye_in_hand_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env_name}-finetune
+  run: ${now:%H-%M-%S}_${name}
+
+train:
+  n_train_itr: 151
+  n_critic_warmup_itr: 2
+  n_steps: 300
+  gamma: 0.999
+  augment: True
+  grad_accumulate: 15
+  actor_lr: 5e-5
+  actor_weight_decay: 0
+  actor_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 5e-5
+  critic_lr: 1e-3
+  critic_weight_decay: 0
+  critic_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 1e-3
+  save_model_freq: 100
+  val_freq: 10
+  render:
+    freq: 1
+    num: 0
+  # PPO specific
+  reward_scale_running: True
+  reward_scale_const: 1.0
+  gae_lambda: 0.95
+  batch_size: 500
+  logprob_batch_size: 500
+  update_epochs: 10
+  vf_coef: 0.5
+  target_kl: 1
+
+model:
+  _target_: model.diffusion.diffusion_ppo.PPODiffusion
+  # HP to tune
+  gamma_denoising: 0.99
+  clip_ploss_coef: 0.01
+  clip_ploss_coef_base: 0.001
+  clip_ploss_coef_rate: 3
+  randn_clip_value: 3
+  min_sampling_denoising_std: 0.1
+  min_logprob_denoising_std: 0.1
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ft_denoising_steps}
+  learn_eta: False
+  eta:
+    base_eta: 1
+    input_dim: ${obs_dim}
+    mlp_dims: [256, 256]
+    action_dim: ${action_dim}
+    min_eta: 0.1
+    max_eta: 1.0
+    _target_: model.diffusion.eta.EtaFixed
+  network_path: ${base_policy_path}
+  actor:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  critic:
+    _target_: model.common.critic.ViTCritic
+    spatial_emb: 128
+    augment: False
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    mlp_dims: [256, 256, 256]
+    activation_type: Mish
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  ft_denoising_steps: ${ft_denoising_steps}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml
index 1f093e2..d03a676 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp.yaml
@@ -45,20 +45,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 151
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml
index fcba3e6..581b659 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_mlp_img.yaml
@@ -1,7 +1,7 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
 
@@ -57,22 +57,22 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 200
+  n_train_itr: 151
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
   augment: True
   grad_accumulate: 5
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 200
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 200
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -140,9 +140,9 @@ model:
         embed_style: embed2
         embed_norm: 0
     img_cond_steps: ${img_cond_steps}
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml
index 3b6254d..006769f 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_gaussian_transformer.yaml
@@ -45,20 +45,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 151
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml
index 1e7beb2..d141be2 100644
--- a/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml
+++ b/cfg/robomimic/finetune/can/ft_ppo_gmm_mlp.yaml
@@ -46,20 +46,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 151
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml
index 16b9485..3666789 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp.yaml
@@ -1,13 +1,14 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
 
 name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
 logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
-base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
+# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
 robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
 normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
 
@@ -54,13 +55,13 @@ train:
   actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml
index 72207d6..8c6bcc8 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_mlp_img.yaml
@@ -60,22 +60,22 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 151
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
   augment: True
   grad_accumulate: 15
-  actor_lr: 1e-4
+  actor_lr: 5e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-4
+    min_lr: 5e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml
index 6550645..e20383a 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet.yaml
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -47,20 +47,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -102,10 +102,10 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   ft_denoising_steps: ${ft_denoising_steps}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
new file mode 100644
index 0000000..f72b70b
--- /dev/null
+++ b/cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
@@ -0,0 +1,173 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
+
+name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: lift
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+ft_denoising_steps: 5
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 300
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['robot0_eye_in_hand_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env_name}-finetune
+  run: ${now:%H-%M-%S}_${name}
+
+train:
+  n_train_itr: 81
+  n_critic_warmup_itr: 2
+  n_steps: 300
+  gamma: 0.999
+  augment: True
+  grad_accumulate: 15
+  actor_lr: 5e-5
+  actor_weight_decay: 0
+  actor_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 5e-5
+  critic_lr: 1e-3
+  critic_weight_decay: 0
+  critic_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 1e-3
+  save_model_freq: 100
+  val_freq: 10
+  render:
+    freq: 1
+    num: 0
+  # PPO specific
+  reward_scale_running: True
+  reward_scale_const: 1.0
+  gae_lambda: 0.95
+  batch_size: 500
+  logprob_batch_size: 500
+  update_epochs: 10
+  vf_coef: 0.5
+  target_kl: 1
+
+model:
+  _target_: model.diffusion.diffusion_ppo.PPODiffusion
+  # HP to tune
+  gamma_denoising: 0.99
+  clip_ploss_coef: 0.01
+  clip_ploss_coef_base: 0.001
+  clip_ploss_coef_rate: 3
+  randn_clip_value: 3
+  min_sampling_denoising_std: 0.1
+  min_logprob_denoising_std: 0.1
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ft_denoising_steps}
+  learn_eta: False
+  eta:
+    base_eta: 1
+    input_dim: ${obs_dim}
+    mlp_dims: [256, 256]
+    action_dim: ${action_dim}
+    min_eta: 0.1
+    max_eta: 1.0
+    _target_: model.diffusion.eta.EtaFixed
+  network_path: ${base_policy_path}
+  actor:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  critic:
+    _target_: model.common.critic.ViTCritic
+    spatial_emb: 128
+    augment: False
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    mlp_dims: [256, 256, 256]
+    activation_type: Mish
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  ft_denoising_steps: ${ft_denoising_steps}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml
index 6bab450..5ea4132 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -45,20 +45,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -93,9 +93,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml
index 6f589c3..dbd9b3c 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_mlp_img.yaml
@@ -1,7 +1,7 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
 
@@ -57,22 +57,22 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 200
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
   augment: True
   grad_accumulate: 5
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 200
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 200
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -140,9 +140,9 @@ model:
         embed_style: embed2
         embed_norm: 0
     img_cond_steps: ${img_cond_steps}
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml
index fff3c02..d7b9965 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_gaussian_transformer.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -45,20 +45,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -94,9 +94,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml
index 1b31a4e..28e3280 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_gmm_mlp.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -46,20 +46,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -94,9 +94,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml
index 4a89144..45c0024 100644
--- a/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml
+++ b/cfg/robomimic/finetune/lift/ft_ppo_gmm_transformer.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 300
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -46,20 +46,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 300
+  n_train_itr: 81
   n_critic_warmup_itr: 2
   n_steps: 300
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-5
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -95,9 +95,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml
index edbe296..eab5648 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp.yaml
@@ -1,7 +1,7 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
 
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -54,14 +54,14 @@ train:
   actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml
index 84355d6..e9ad66c 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_mlp_img.yaml
@@ -69,13 +69,13 @@ train:
   actor_lr: 1e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml
index 794017a..2031557 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet.yaml
@@ -1,7 +1,7 @@
 defaults:
   - _self_
 hydra:
-  run:  
+  run:
     dir: ${logdir}
 _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
 
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -47,21 +47,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 2e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-5
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -102,10 +102,10 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   ft_denoising_steps: ${ft_denoising_steps}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
diff --git a/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
new file mode 100644
index 0000000..4dba7ee
--- /dev/null
+++ b/cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
@@ -0,0 +1,173 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
+
+name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: square
+obs_dim: 9
+action_dim: 7
+denoising_steps: 100
+ft_denoising_steps: 5
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 4
+act_steps: 4
+use_ddim: True
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 400
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos']
+      image_keys: ['agentview_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env_name}-finetune
+  run: ${now:%H-%M-%S}_${name}
+
+train:
+  n_train_itr: 301
+  n_critic_warmup_itr: 2
+  n_steps: 400
+  gamma: 0.999
+  augment: True
+  grad_accumulate: 20
+  actor_lr: 1e-5
+  actor_weight_decay: 0
+  actor_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 1e-5
+  critic_lr: 1e-3
+  critic_weight_decay: 0
+  critic_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 1e-3
+  save_model_freq: 100
+  val_freq: 10
+  render:
+    freq: 1
+    num: 0
+  # PPO specific
+  reward_scale_running: True
+  reward_scale_const: 1.0
+  gae_lambda: 0.95
+  batch_size: 500
+  logprob_batch_size: 1000
+  update_epochs: 10
+  vf_coef: 0.5
+  target_kl: 1
+
+model:
+  _target_: model.diffusion.diffusion_ppo.PPODiffusion
+  # HP to tune
+  gamma_denoising: 0.99
+  clip_ploss_coef: 0.01
+  clip_ploss_coef_base: 0.001
+  clip_ploss_coef_rate: 3
+  randn_clip_value: 3
+  min_sampling_denoising_std: 0.1
+  min_logprob_denoising_std: 0.1
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ft_denoising_steps}
+  learn_eta: False
+  eta:
+    base_eta: 1
+    input_dim: ${obs_dim}
+    mlp_dims: [256, 256]
+    action_dim: ${action_dim}
+    min_eta: 0.1
+    max_eta: 1.0
+    _target_: model.diffusion.eta.EtaFixed
+  network_path: ${base_policy_path}
+  actor:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  critic:
+    _target_: model.common.critic.ViTCritic
+    spatial_emb: 128
+    augment: False
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    mlp_dims: [256, 256, 256]
+    activation_type: Mish
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  ft_denoising_steps: ${ft_denoising_steps}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml
index e5f382c..c296aff 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -45,21 +45,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-5
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -93,9 +93,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml
index 7ed1e91..aa63306 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_mlp_img.yaml
@@ -57,7 +57,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 500
+  n_train_itr: 301
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
@@ -66,13 +66,13 @@ train:
   actor_lr: 1e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 500
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 500
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -140,9 +140,9 @@ model:
         embed_style: embed2
         embed_norm: 0
     img_cond_steps: ${img_cond_steps}
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml
index e5ca94b..df13e51 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_gaussian_transformer.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -45,21 +45,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-5
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -94,9 +94,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml
index e7f14ca..9e36d2a 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_gmm_mlp.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -46,21 +46,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-5
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -94,9 +94,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml
index b5f3157..fa016ad 100644
--- a/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml
+++ b/cfg/robomimic/finetune/square/ft_ppo_gmm_transformer.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 400
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -46,21 +46,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-5
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -95,9 +95,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml
index 198855b..c189ae1 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp.yaml
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -57,13 +57,13 @@ train:
   actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml
index b826e06..83033bb 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_mlp_img.yaml
@@ -73,13 +73,13 @@ train:
   actor_lr: 1e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml
index 5aec825..c0a94e8 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet.yaml
@@ -27,7 +27,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -50,20 +50,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 2e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-6
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -76,7 +76,7 @@ train:
   reward_scale_const: 1.0
   gae_lambda: 0.95
   batch_size: 10000
-  update_epochs: 8
+  update_epochs: 5
   vf_coef: 0.5
   target_kl: 1
 
@@ -84,11 +84,11 @@ model:
   _target_: model.diffusion.diffusion_ppo.PPODiffusion
   # HP to tune
   gamma_denoising: 0.99
-  clip_ploss_coef: 0.001
-  clip_ploss_coef_base: 0.0001
+  clip_ploss_coef: 0.01
+  clip_ploss_coef_base: 0.001
   clip_ploss_coef_rate: 3
   randn_clip_value: 3
-  min_sampling_denoising_std: 0.08
+  min_sampling_denoising_std: 0.1
   min_logprob_denoising_std: 0.1
   #
   network_path: ${base_policy_path}
@@ -105,10 +105,10 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   ft_denoising_steps: ${ft_denoising_steps}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml
new file mode 100644
index 0000000..8754adf
--- /dev/null
+++ b/cfg/robomimic/finetune/transport/ft_ppo_diffusion_unet_img.yaml
@@ -0,0 +1,179 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
+
+name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/transport/transport_pre_diffusion_unet_img_ta16_td100/2024-11-15_17-55-22_42/checkpoint/state_1000.pt
+robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
+normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
+
+seed: 42
+device: cuda:0
+env_name: transport
+obs_dim: 18
+action_dim: 14
+denoising_steps: 100
+ft_denoising_steps: 5
+cond_steps: 1
+img_cond_steps: 1
+horizon_steps: 16
+act_steps: 8
+use_ddim: True
+
+env:
+  n_envs: 50
+  name: ${env_name}
+  best_reward_threshold_for_success: 1
+  max_episode_steps: 800
+  save_video: False
+  use_image_obs: True
+  wrappers:
+    robomimic_image:
+      normalization_path: ${normalization_path}
+      low_dim_keys: ['robot0_eef_pos',
+                     'robot0_eef_quat',
+                     'robot0_gripper_qpos',
+                     "robot1_eef_pos",
+                     "robot1_eef_quat",
+                     "robot1_gripper_qpos"]
+      image_keys: ['shouldercamera0_image', 
+                   'shouldercamera1_image']
+      shape_meta: ${shape_meta}
+    multi_step:
+      n_obs_steps: ${cond_steps}
+      n_action_steps: ${act_steps}
+      max_episode_steps: ${env.max_episode_steps}
+      reset_within_step: True
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [6, 96, 96]
+    state:
+      shape: [18]
+  action: 
+    shape: [14]
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env_name}-finetune
+  run: ${now:%H-%M-%S}_${name}
+
+train:
+  n_train_itr: 201
+  n_critic_warmup_itr: 2
+  n_steps: 400
+  gamma: 0.999
+  augment: True
+  grad_accumulate: 20
+  actor_lr: 2e-5
+  actor_weight_decay: 0
+  actor_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 2e-5
+  critic_lr: 1e-3
+  critic_weight_decay: 0
+  critic_lr_scheduler:
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 10
+    min_lr: 1e-3
+  save_model_freq: 100
+  val_freq: 10
+  render:
+    freq: 1
+    num: 0
+  # PPO specific
+  reward_scale_running: True
+  reward_scale_const: 1.0
+  gae_lambda: 0.95
+  batch_size: 500
+  logprob_batch_size: 1000
+  update_epochs: 10
+  vf_coef: 0.5
+  target_kl: 1
+
+model:
+  _target_: model.diffusion.diffusion_ppo.PPODiffusion
+  # HP to tune
+  gamma_denoising: 0.99
+  clip_ploss_coef: 0.01
+  clip_ploss_coef_base: 0.001
+  clip_ploss_coef_rate: 3
+  randn_clip_value: 3
+  min_sampling_denoising_std: 0.1
+  min_logprob_denoising_std: 0.1
+  #
+  use_ddim: ${use_ddim}
+  ddim_steps: ${ft_denoising_steps}
+  learn_eta: False
+  eta:
+    base_eta: 1
+    input_dim: ${obs_dim}
+    mlp_dims: [256, 256]
+    action_dim: ${action_dim}
+    min_eta: 0.1
+    max_eta: 1.0
+    _target_: model.diffusion.eta.EtaFixed
+  network_path: ${base_policy_path}
+  actor:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: False
+    num_img: 2
+    spatial_emb: 128
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  critic:
+    _target_: model.common.critic.ViTCritic
+    spatial_emb: 128
+    num_img: 2
+    augment: False
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      img_h: ${shape_meta.obs.rgb.shape[1]}
+      img_w: ${shape_meta.obs.rgb.shape[2]}
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    mlp_dims: [256, 256, 256]
+    activation_type: Mish
+    residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  ft_denoising_steps: ${ft_denoising_steps}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml
index aa6338c..b3583fb 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -48,21 +48,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-6
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -74,7 +74,7 @@ train:
   reward_scale_const: 1.0
   gae_lambda: 0.95
   batch_size: 10000
-  update_epochs: 8
+  update_epochs: 5
   vf_coef: 0.5
   target_kl: 1
 
@@ -87,7 +87,7 @@ model:
     _target_: model.common.mlp_gaussian.Gaussian_MLP
     mlp_dims: [1024, 1024, 1024]
     residual_style: True
-    fixed_std: 0.08
+    fixed_std: 0.1
     learn_fixed_std: True
     std_min: 0.01
     std_max: 0.2
@@ -96,9 +96,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml
index 286c7bb..915691d 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_mlp_img.yaml
@@ -61,7 +61,7 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 500
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
@@ -70,13 +70,13 @@ train:
   actor_lr: 1e-5
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 500
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-6
+    min_lr: 1e-5
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 500
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -90,7 +90,7 @@ train:
   gae_lambda: 0.95
   batch_size: 1000
   logprob_batch_size: 1000
-  update_epochs: 8
+  update_epochs: 10
   vf_coef: 0.5
   target_kl: 1
 
@@ -119,7 +119,7 @@ model:
     spatial_emb: 128
     mlp_dims: [768, 768, 768]
     residual_style: True
-    fixed_std: 0.08
+    fixed_std: 0.1
     learn_fixed_std: True
     std_min: 0.01
     std_max: 0.2
@@ -146,9 +146,9 @@ model:
         embed_style: embed2
         embed_norm: 0
     img_cond_steps: ${img_cond_steps}
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml
index 2681560..f7c990c 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_gaussian_transformer.yaml
@@ -25,7 +25,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -48,20 +48,20 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
-    min_lr: 1e-6
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
+    first_cycle_steps: ${train.n_train_itr}
     warmup_steps: 10
     min_lr: 1e-3
   save_model_freq: 100
@@ -74,7 +74,7 @@ train:
   reward_scale_const: 1.0
   gae_lambda: 0.95
   batch_size: 10000
-  update_epochs: 8
+  update_epochs: 5
   vf_coef: 0.5
   target_kl: 1
 
@@ -97,9 +97,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml
index b707736..7f43e09 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_gmm_mlp.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -49,21 +49,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-6
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -75,7 +75,7 @@ train:
   reward_scale_const: 1.0
   gae_lambda: 0.95
   batch_size: 10000
-  update_epochs: 8
+  update_epochs: 5
   vf_coef: 0.5
   target_kl: 1
 
@@ -87,7 +87,7 @@ model:
     _target_: model.common.mlp_gmm.GMM_MLP
     mlp_dims: [1024, 1024, 1024]
     residual_style: True
-    fixed_std: 0.08
+    fixed_std: 0.1
     learn_fixed_std: True
     std_min: 0.01
     std_max: 0.2
@@ -97,9 +97,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml b/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml
index f1b981b..62b2e81 100644
--- a/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml
+++ b/cfg/robomimic/finetune/transport/ft_ppo_gmm_transformer.yaml
@@ -26,7 +26,7 @@ env:
   name: ${env_name}
   best_reward_threshold_for_success: 1
   max_episode_steps: 800
-  save_video: false
+  save_video: False
   wrappers:
     robomimic_lowdim:
       normalization_path: ${normalization_path}
@@ -49,21 +49,21 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_train_itr: 1000
+  n_train_itr: 201
   n_critic_warmup_itr: 2
   n_steps: 400
   gamma: 0.999
-  actor_lr: 1e-5
+  actor_lr: 1e-4
   actor_weight_decay: 0
   actor_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
-    min_lr: 1e-6
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
+    min_lr: 1e-4
   critic_lr: 1e-3
   critic_weight_decay: 0
   critic_lr_scheduler:
-    first_cycle_steps: 1000
-    warmup_steps: 10
+    first_cycle_steps: ${train.n_train_itr}
+    warmup_steps: 0
     min_lr: 1e-3
   save_model_freq: 100
   val_freq: 10
@@ -75,7 +75,7 @@ train:
   reward_scale_const: 1.0
   gae_lambda: 0.95
   batch_size: 10000
-  update_epochs: 8
+  update_epochs: 5
   vf_coef: 0.5
   target_kl: 1
 
@@ -98,9 +98,9 @@ model:
     action_dim: ${action_dim}
   critic:
     _target_: model.common.critic.CriticObs
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     mlp_dims: [256, 256, 256]
     activation_type: Mish
     residual_style: True
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml
index 1fa3ec6..726b097 100644
--- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml
index e95eb28..59ce739 100644
--- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_img.yaml
@@ -34,12 +34,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 5000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -53,7 +53,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml
index 3d47545..5c5802b 100644
--- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml
index c9afe59..9e0a277 100644
--- a/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_mlp_ta1_ph.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml
index c3cc4f3..f3338fa 100644
--- a/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_unet.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -47,8 +47,8 @@ model:
     n_groups: 8
     smaller_encoder: False
     cond_predict_scale: True
-    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
     action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
   horizon_steps: ${horizon_steps}
   obs_dim: ${obs_dim}
   action_dim: ${action_dim}
diff --git a/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml
new file mode 100644
index 0000000..1592e73
--- /dev/null
+++ b/cfg/robomimic/pretrain/can/pre_diffusion_unet_img.yaml
@@ -0,0 +1,94 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent
+
+name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz
+
+seed: 42
+device: cuda:0
+env: can
+obs_dim: 9  # proprioception only
+action_dim: 7
+denoising_steps: 100
+horizon_steps: 4
+cond_steps: 1
+img_cond_steps: 1
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env}-pretrain
+  run: ${now:%H-%M-%S}_${name}
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]  # not counting img_cond_steps
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+train:
+  n_epochs: 2000
+  batch_size: 256
+  learning_rate: 1e-4
+  weight_decay: 1e-6
+  lr_scheduler:
+    first_cycle_steps: 2000
+    warmup_steps: 100
+    min_lr: 1e-5
+  save_model_freq: 500
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: True
+    spatial_emb: 128
+    #
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
+
+ema:
+  decay: 0.995
+
+train_dataset:
+  _target_: agent.dataset.sequence.StitchedSequenceDataset
+  use_img: True
+  dataset_path: ${train_dataset_path}
+  horizon_steps: ${horizon_steps}
+  max_n_episodes: 100
+  cond_steps: ${cond_steps}
+  img_cond_steps: ${img_cond_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml
index 49fecb3..2f54207 100644
--- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml
index 12c949d..bc7fb07 100644
--- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ibrl.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 0
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml
index 1320863..98c0bf4 100644
--- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_img.yaml
@@ -33,12 +33,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 1000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -50,7 +50,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml
index a8911d3..4eb6e3e 100644
--- a/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gaussian_mlp_ta1_ph.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml
index e8fddc7..05d7952 100644
--- a/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gaussian_transformer.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml
index 78cf40d..c463319 100644
--- a/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gmm_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml
index e8057da..9e731cd 100644
--- a/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml
+++ b/cfg/robomimic/pretrain/can/pre_gmm_transformer.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
index a5715be..e67fd04 100644
--- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml
index 5b96d97..58c22d5 100644
--- a/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_diffusion_mlp_img.yaml
@@ -34,12 +34,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 2500
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -53,7 +53,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml
index fa56862..c538574 100644
--- a/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_diffusion_unet.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml
new file mode 100644
index 0000000..a8ebfb6
--- /dev/null
+++ b/cfg/robomimic/pretrain/lift/pre_diffusion_unet_img.yaml
@@ -0,0 +1,94 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent
+
+name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz
+
+seed: 42
+device: cuda:0
+env: lift
+obs_dim: 9  # proprioception only
+action_dim: 7
+denoising_steps: 100
+horizon_steps: 4
+cond_steps: 1
+img_cond_steps: 1
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env}-pretrain
+  run: ${now:%H-%M-%S}_${name}
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]  # not counting img_cond_steps
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+train:
+  n_epochs: 2000
+  batch_size: 256
+  learning_rate: 1e-4
+  weight_decay: 1e-6
+  lr_scheduler:
+    first_cycle_steps: 2000
+    warmup_steps: 100
+    min_lr: 1e-5
+  save_model_freq: 500
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: True
+    spatial_emb: 128
+    #
+    diffusion_step_embed_dim: 32
+    dim: 40
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
+
+ema:
+  decay: 0.995
+
+train_dataset:
+  _target_: agent.dataset.sequence.StitchedSequenceDataset
+  use_img: True
+  dataset_path: ${train_dataset_path}
+  horizon_steps: ${horizon_steps}
+  max_n_episodes: 100
+  cond_steps: ${cond_steps}
+  img_cond_steps: ${img_cond_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml
index 5243803..98de2ff 100644
--- a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml
index c77508c..bfcab21 100644
--- a/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_gaussian_mlp_img.yaml
@@ -33,12 +33,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 500
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 3000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -50,7 +50,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml
index 6263925..695529e 100644
--- a/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_gaussian_transformer.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml
index bf36bbb..4dc3065 100644
--- a/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_gmm_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml
index fbfa1c1..134e0da 100644
--- a/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml
+++ b/cfg/robomimic/pretrain/lift/pre_gmm_transformer.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml
index effd320..fc59c7e 100644
--- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml
index e1f15ec..c5061a8 100644
--- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_img.yaml
@@ -34,12 +34,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 4000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -53,7 +53,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml
index fdfd118..6531116 100644
--- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml
index ba7664d..3dc0e66 100644
--- a/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_mlp_ta1_ph.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml
index 96a2bba..ed43711 100644
--- a/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_unet.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml
new file mode 100644
index 0000000..b52fde4
--- /dev/null
+++ b/cfg/robomimic/pretrain/square/pre_diffusion_unet_img.yaml
@@ -0,0 +1,94 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent
+
+name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz
+
+seed: 42
+device: cuda:0
+env: square
+obs_dim: 9  # proprioception only
+action_dim: 7
+denoising_steps: 100
+horizon_steps: 4
+cond_steps: 1
+img_cond_steps: 1
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env}-pretrain
+  run: ${now:%H-%M-%S}_${name}
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [3, 96, 96]  # not counting img_cond_steps
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+train:
+  n_epochs: 2000
+  batch_size: 256
+  learning_rate: 1e-4
+  weight_decay: 1e-6
+  lr_scheduler:
+    first_cycle_steps: 2000
+    warmup_steps: 100
+    min_lr: 1e-5
+  save_model_freq: 500
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    img_cond_steps: ${img_cond_steps}
+    augment: True
+    spatial_emb: 128
+    #
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    action_dim: ${action_dim}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
+
+ema:
+  decay: 0.995
+
+train_dataset:
+  _target_: agent.dataset.sequence.StitchedSequenceDataset
+  use_img: True
+  dataset_path: ${train_dataset_path}
+  horizon_steps: ${horizon_steps}
+  max_n_episodes: 100
+  cond_steps: ${cond_steps}
+  img_cond_steps: ${img_cond_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml
index 0b26d26..d8bd7b3 100644
--- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml
index 7b118cf..a432ba1 100644
--- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ibrl.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 0
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-4
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml
index c9cc2f3..1ac7ea0 100644
--- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_img.yaml
@@ -33,12 +33,12 @@ shape_meta:
     shape: [7]
 
 train:
-  n_epochs: 4000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -50,7 +50,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml
index 84fbbb4..56fecf6 100644
--- a/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gaussian_mlp_ta1_ph.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml
index c6ae3d1..9ea27e1 100644
--- a/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gaussian_transformer.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml
index 3c70528..63aff6b 100644
--- a/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gmm_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml
index 3232db7..7900820 100644
--- a/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml
+++ b/cfg/robomimic/pretrain/square/pre_gmm_transformer.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml
index 9db3685..c827d2d 100644
--- a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml
index befefd2..699cf9b 100644
--- a/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_diffusion_mlp_img.yaml
@@ -27,19 +27,19 @@ wandb:
 shape_meta:
   obs:
     rgb:
-      shape: [3, 96, 96]  # not counting img_cond_steps
+      shape: [6, 96, 96]  # not counting img_cond_steps
     state:
       shape: [9]
   action: 
     shape: [7]
 
 train:
-  n_epochs: 8000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 8000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -53,7 +53,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml
index 3b7bc4e..e9902a9 100644
--- a/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_diffusion_unet.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 8000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 10000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml b/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml
new file mode 100644
index 0000000..0099c22
--- /dev/null
+++ b/cfg/robomimic/pretrain/transport/pre_diffusion_unet_img.yaml
@@ -0,0 +1,95 @@
+defaults:
+  - _self_
+hydra:
+  run:
+    dir: ${logdir}
+_target_: agent.pretrain.train_diffusion_agent.TrainDiffusionAgent
+
+name: ${env}_pre_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
+logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
+train_dataset_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env}-img/train.npz
+
+seed: 42
+device: cuda:0
+env: transport
+obs_dim: 18  # proprioception only
+action_dim: 14
+denoising_steps: 100
+horizon_steps: 16
+cond_steps: 1
+img_cond_steps: 1
+
+wandb:
+  entity: ${oc.env:DPPO_WANDB_ENTITY}
+  project: robomimic-${env}-pretrain
+  run: ${now:%H-%M-%S}_${name}
+
+shape_meta:
+  obs:
+    rgb:
+      shape: [6, 96, 96]  # not counting img_cond_steps
+    state:
+      shape: [9]
+  action: 
+    shape: [7]
+
+train:
+  n_epochs: 2000
+  batch_size: 256
+  learning_rate: 1e-4
+  weight_decay: 1e-6
+  lr_scheduler:
+    first_cycle_steps: 2000
+    warmup_steps: 100
+    min_lr: 1e-5
+  save_model_freq: 500
+
+model:
+  _target_: model.diffusion.diffusion.DiffusionModel
+  predict_epsilon: True
+  denoised_clip_value: 1.0
+  network:
+    _target_: model.diffusion.unet.VisionUnet1D
+    backbone:
+      _target_: model.common.vit.VitEncoder
+      obs_shape: ${shape_meta.obs.rgb.shape}
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
+      cfg:
+        patch_size: 8
+        depth: 1
+        embed_dim: 128
+        num_heads: 4
+        embed_style: embed2
+        embed_norm: 0
+    augment: True
+    num_img: 2
+    spatial_emb: 128
+    #
+    diffusion_step_embed_dim: 32
+    dim: 64
+    dim_mults: [1, 2]
+    kernel_size: 5
+    n_groups: 8
+    smaller_encoder: False
+    cond_predict_scale: True
+    img_cond_steps: ${img_cond_steps}
+    cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
+    action_dim: ${action_dim}
+  horizon_steps: ${horizon_steps}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  denoising_steps: ${denoising_steps}
+  device: ${device}
+
+ema:
+  decay: 0.995
+
+train_dataset:
+  _target_: agent.dataset.sequence.StitchedSequenceDataset
+  use_img: True
+  dataset_path: ${train_dataset_path}
+  horizon_steps: ${horizon_steps}
+  max_n_episodes: 100
+  cond_steps: ${cond_steps}
+  img_cond_steps: ${img_cond_steps}
+  device: ${device}
\ No newline at end of file
diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml
index 900cdc1..7e08182 100644
--- a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml
index 040c383..5f724f7 100644
--- a/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_gaussian_mlp_img.yaml
@@ -26,19 +26,19 @@ wandb:
 shape_meta:
   obs:
     rgb:
-      shape: [3, 96, 96]  # not counting img_cond_steps
+      shape: [6, 96, 96]  # not counting img_cond_steps
     state:
       shape: [9]
   action: 
     shape: [7]
 
 train:
-  n_epochs: 5000
+  n_epochs: 2000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 2000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
@@ -50,7 +50,7 @@ model:
     backbone:
       _target_: model.common.vit.VitEncoder
       obs_shape: ${shape_meta.obs.rgb.shape}
-      num_channel: ${eval:'${shape_meta.obs.rgb.shape[0]} * ${img_cond_steps}'} # each image patch is history concatenated
+      num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
       cfg:
         patch_size: 8
         depth: 1
diff --git a/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml b/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml
index 372bb2c..033868c 100644
--- a/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_gaussian_transformer.yaml
@@ -23,12 +23,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml b/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml
index ef5daf0..dfd07ab 100644
--- a/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_gmm_mlp.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml b/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml
index cda80e7..9ce532a 100644
--- a/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml
+++ b/cfg/robomimic/pretrain/transport/pre_gmm_transformer.yaml
@@ -24,12 +24,12 @@ wandb:
   run: ${now:%H-%M-%S}_${name}
 
 train:
-  n_epochs: 5000
+  n_epochs: 3000
   batch_size: 256
   learning_rate: 1e-4
   weight_decay: 1e-6
   lr_scheduler:
-    first_cycle_steps: 5000
+    first_cycle_steps: 3000
     warmup_steps: 100
     min_lr: 1e-5
   save_model_freq: 500
diff --git a/model/diffusion/unet.py b/model/diffusion/unet.py
index c45b9c8..0307b50 100644
--- a/model/diffusion/unet.py
+++ b/model/diffusion/unet.py
@@ -10,6 +10,7 @@ import torch.nn as nn
 import einops
 from einops.layers.torch import Rearrange
 import logging
+from copy import deepcopy
 
 log = logging.getLogger(__name__)
 
@@ -20,7 +21,8 @@ from model.diffusion.modules import (
     Conv1dBlock,
 )
 from model.common.mlp import ResidualMLP
-
+from model.diffusion.modules import SinusoidalPosEmb
+from model.common.modules import SpatialEmb, RandomShiftsAug
 
 class ResidualBlock1D(nn.Module):
 
@@ -323,3 +325,295 @@ class Unet1D(nn.Module):
 
         x = einops.rearrange(x, "b t h -> b h t")
         return x
+
+
+class VisionUnet1D(nn.Module):
+
+    def __init__(
+        self,
+        backbone,
+        action_dim,
+        img_cond_steps=1,
+        cond_dim=None,
+        diffusion_step_embed_dim=32,
+        dim=32,
+        dim_mults=(1, 2, 4, 8),
+        smaller_encoder=False,
+        cond_mlp_dims=None,
+        kernel_size=5,
+        n_groups=None,
+        activation_type="Mish",
+        cond_predict_scale=False,
+        groupnorm_eps=1e-5,
+        spatial_emb=0,
+        dropout=0,
+        num_img=1,
+        augment=False,
+    ):
+        super().__init__()
+
+        # vision
+        self.backbone = backbone
+        if augment:
+            self.aug = RandomShiftsAug(pad=4)
+        self.augment = augment
+        self.num_img = num_img
+        self.img_cond_steps = img_cond_steps
+        if spatial_emb > 0:
+            assert spatial_emb > 1, "this is the dimension"
+            if num_img > 1:
+                self.compress1 = SpatialEmb(
+                    num_patch=self.backbone.num_patch,
+                    patch_dim=self.backbone.patch_repr_dim,
+                    prop_dim=cond_dim,
+                    proj_dim=spatial_emb,
+                    dropout=dropout,
+                )
+                self.compress2 = deepcopy(self.compress1)
+            else:  # TODO: clean up
+                self.compress = SpatialEmb(
+                    num_patch=self.backbone.num_patch,
+                    patch_dim=self.backbone.patch_repr_dim,
+                    prop_dim=cond_dim,
+                    proj_dim=spatial_emb,
+                    dropout=dropout,
+                )
+            visual_feature_dim = spatial_emb * num_img
+        else:
+            self.compress = nn.Sequential(
+                nn.Linear(self.backbone.repr_dim, visual_feature_dim),
+                nn.LayerNorm(visual_feature_dim),
+                nn.Dropout(dropout),
+                nn.ReLU(),
+            )
+
+        # unet
+        dims = [action_dim, *map(lambda m: dim * m, dim_mults)]
+        in_out = list(zip(dims[:-1], dims[1:]))
+        log.info(f"Channel dimensions: {in_out}")
+
+        dsed = diffusion_step_embed_dim
+        self.time_mlp = nn.Sequential(
+            SinusoidalPosEmb(dsed),
+            nn.Linear(dsed, dsed * 4),
+            nn.Mish(),
+            nn.Linear(dsed * 4, dsed),
+        )
+        if cond_mlp_dims is not None:
+            self.cond_mlp = ResidualMLP(
+                dim_list=[cond_dim] + cond_mlp_dims,
+                activation_type=activation_type,
+                out_activation_type="Identity",
+            )
+            cond_block_dim = dsed + cond_mlp_dims[-1] + visual_feature_dim
+        else:
+            cond_block_dim = dsed + cond_dim + visual_feature_dim
+        use_large_encoder_in_block = cond_mlp_dims is None and not smaller_encoder
+
+        mid_dim = dims[-1]
+        self.mid_modules = nn.ModuleList(
+            [
+                ResidualBlock1D(
+                    mid_dim,
+                    mid_dim,
+                    cond_dim=cond_block_dim,
+                    kernel_size=kernel_size,
+                    n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale,
+                    larger_encoder=use_large_encoder_in_block,
+                    activation_type=activation_type,
+                    groupnorm_eps=groupnorm_eps,
+                ),
+                ResidualBlock1D(
+                    mid_dim,
+                    mid_dim,
+                    cond_dim=cond_block_dim,
+                    kernel_size=kernel_size,
+                    n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale,
+                    larger_encoder=use_large_encoder_in_block,
+                    activation_type=activation_type,
+                    groupnorm_eps=groupnorm_eps,
+                ),
+            ]
+        )
+
+        self.down_modules = nn.ModuleList([])
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            is_last = ind >= (len(in_out) - 1)
+            self.down_modules.append(
+                nn.ModuleList(
+                    [
+                        ResidualBlock1D(
+                            dim_in,
+                            dim_out,
+                            cond_dim=cond_block_dim,
+                            kernel_size=kernel_size,
+                            n_groups=n_groups,
+                            cond_predict_scale=cond_predict_scale,
+                            larger_encoder=use_large_encoder_in_block,
+                            activation_type=activation_type,
+                            groupnorm_eps=groupnorm_eps,
+                        ),
+                        ResidualBlock1D(
+                            dim_out,
+                            dim_out,
+                            cond_dim=cond_block_dim,
+                            kernel_size=kernel_size,
+                            n_groups=n_groups,
+                            cond_predict_scale=cond_predict_scale,
+                            larger_encoder=use_large_encoder_in_block,
+                            activation_type=activation_type,
+                            groupnorm_eps=groupnorm_eps,
+                        ),
+                        Downsample1d(dim_out) if not is_last else nn.Identity(),
+                    ]
+                )
+            )
+
+        self.up_modules = nn.ModuleList([])
+        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
+            is_last = ind >= (len(in_out) - 1)
+            self.up_modules.append(
+                nn.ModuleList(
+                    [
+                        ResidualBlock1D(
+                            dim_out * 2,
+                            dim_in,
+                            cond_dim=cond_block_dim,
+                            kernel_size=kernel_size,
+                            n_groups=n_groups,
+                            cond_predict_scale=cond_predict_scale,
+                            larger_encoder=use_large_encoder_in_block,
+                            activation_type=activation_type,
+                            groupnorm_eps=groupnorm_eps,
+                        ),
+                        ResidualBlock1D(
+                            dim_in,
+                            dim_in,
+                            cond_dim=cond_block_dim,
+                            kernel_size=kernel_size,
+                            n_groups=n_groups,
+                            cond_predict_scale=cond_predict_scale,
+                            larger_encoder=use_large_encoder_in_block,
+                            activation_type=activation_type,
+                            groupnorm_eps=groupnorm_eps,
+                        ),
+                        Upsample1d(dim_in) if not is_last else nn.Identity(),
+                    ]
+                )
+            )
+
+        self.final_conv = nn.Sequential(
+            Conv1dBlock(
+                dim,
+                dim,
+                kernel_size=kernel_size,
+                n_groups=n_groups,
+                activation_type=activation_type,
+                eps=groupnorm_eps,
+            ),
+            nn.Conv1d(dim, action_dim, 1),
+        )
+
+    def forward(
+        self,
+        x,
+        time,
+        cond,
+        **kwargs,
+    ):
+        """
+        x: (B, Ta, act_dim)
+        time: (B,) or int, diffusion step
+        cond: dict with key state/rgb; more recent obs at the end
+            state: (B, To, obs_dim)
+        """
+        B = len(x)
+        _, T_rgb, C, H, W = cond["rgb"].shape
+
+        # move chunk dim to the end
+        x = einops.rearrange(x, "b h t -> b t h")
+
+        # flatten history
+        state = cond["state"].view(B, -1)
+
+        # obs encoder
+        if hasattr(self, "cond_mlp"):
+            state = self.cond_mlp(state)
+
+        # Take recent images --- sometimes we want to use fewer img_cond_steps than cond_steps (e.g., 1 image but 3 prio)
+        rgb = cond["rgb"][:, -self.img_cond_steps :]
+
+        # concatenate images in cond by channels
+        if self.num_img > 1:
+            rgb = rgb.reshape(B, T_rgb, self.num_img, 3, H, W)
+            rgb = einops.rearrange(rgb, "b t n c h w -> b n (t c) h w")
+        else:
+            rgb = einops.rearrange(rgb, "b t c h w -> b (t c) h w")
+
+        # convert rgb to float32 for augmentation
+        rgb = rgb.float()
+
+        # get vit output - pass in two images separately
+        if self.num_img > 1:  # TODO: properly handle multiple images
+            rgb1 = rgb[:, 0]
+            rgb2 = rgb[:, 1]
+            if self.augment:
+                rgb1 = self.aug(rgb1)
+                rgb2 = self.aug(rgb2)
+            feat1 = self.backbone(rgb1)
+            feat2 = self.backbone(rgb2)
+            feat1 = self.compress1.forward(feat1, state)
+            feat2 = self.compress2.forward(feat2, state)
+            feat = torch.cat([feat1, feat2], dim=-1)
+        else:  # single image
+            if self.augment:
+                rgb = self.aug(rgb)
+            feat = self.backbone(rgb)
+
+            # compress
+            if isinstance(self.compress, SpatialEmb):
+                feat = self.compress.forward(feat, state)
+            else:
+                feat = feat.flatten(1, -1)
+                feat = self.compress(feat)
+        cond_encoded = torch.cat([feat, state], dim=-1)
+
+        # 1. time
+        if not torch.is_tensor(time):
+            time = torch.tensor([time], dtype=torch.long, device=x.device)
+        elif torch.is_tensor(time) and len(time.shape) == 0:
+            time = time[None].to(x.device)
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        time = time.expand(x.shape[0])
+        global_feature = self.time_mlp(time)
+        global_feature = torch.cat([global_feature, cond_encoded], axis=-1)
+
+        # encode local features
+        h_local = list()
+        h = []
+        for idx, (resnet, resnet2, downsample) in enumerate(self.down_modules):
+            x = resnet(x, global_feature)
+            if idx == 0 and len(h_local) > 0:
+                x = x + h_local[0]
+            x = resnet2(x, global_feature)
+            h.append(x)
+            x = downsample(x)
+
+        for mid_module in self.mid_modules:
+            x = mid_module(x, global_feature)
+
+        for idx, (resnet, resnet2, upsample) in enumerate(self.up_modules):
+            x = torch.cat((x, h.pop()), dim=1)
+            x = resnet(x, global_feature)
+            if idx == len(self.up_modules) and len(h_local) > 0:
+                x = x + h_local[1]
+            x = resnet2(x, global_feature)
+            x = upsample(x)
+
+        x = self.final_conv(x)
+
+        x = einops.rearrange(x, "b t h -> b h t")
+        return x
+
diff --git a/pyproject.toml b/pyproject.toml
index b1dbffe..a432bef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dppo"
-version = "0.6.0"
+version = "0.7.0"
 description = "Fine-tuning diffusion policies with PPO."
 readme = "README.md"
 requires-python = ">=3.8"
diff --git a/script/download_url.py b/script/download_url.py
index d50ee4d..ca25fc0 100644
--- a/script/download_url.py
+++ b/script/download_url.py
@@ -279,6 +279,11 @@ def get_checkpoint_download_url(cfg):
         in path
     ):
         return "https://drive.google.com/file/d/1T-NGgBmT-UmcVWADygXj873IyWLewvsU/view?usp=drive_link"
+    elif (
+        "lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt"
+        in path
+    ):
+        return "https://drive.google.com/file/d/1-gB4Tz5ityFMnegX7uRz5PCcb7-JTOZg/view?usp=drive_link"
     elif (
         "lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt"
         in path
@@ -323,6 +328,11 @@ def get_checkpoint_download_url(cfg):
         in path
     ):
         return "https://drive.google.com/file/d/1s346KCe2aar_tXX7u8rzjRF3kpwVpH5c/view?usp=drive_link"
+    elif (
+        "can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt"
+        in path
+    ):
+        return "https://drive.google.com/file/d/1SHKcorbyGDg3I0h6hvOkQXWQT4dD0gGh/view?usp=drive_link"
     elif (
         "can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt"
         in path
@@ -393,6 +403,11 @@ def get_checkpoint_download_url(cfg):
         in path
     ):
         return "https://drive.google.com/file/d/11IEgQe0LFI23hn1Cwf6Z_YfJdDilVc0z/view?usp=drive_link"
+    elif (
+        "square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt"
+        in path
+    ):
+        return "https://drive.google.com/file/d/1fcOi9srPOVPwEzBRV-1pzVDq3uap3YAs/view?usp=drive_link"
     elif (
         "square_pre_diffusion_mlp_ta4_td20/2024-07-10_01-46-16/checkpoint/state_8000.pt"
         in path
@@ -459,6 +474,11 @@ def get_checkpoint_download_url(cfg):
         in path
     ):
         return "https://drive.google.com/file/d/1MNGT8j9x1uudugGUcia-xwP_7f7xVY4K/view?usp=drive_link"
+    elif (
+        "transport_pre_diffusion_unet_img_ta16_td100/2024-11-15_17-55-22_42/checkpoint/state_1000.pt"
+        in path
+    ):
+        return "https://drive.google.com/file/d/1G5LTxgRZvPm7NCbfByL4q_FbNvRgWguW/view?usp=drive_link"
     elif (
         "transport_pre_diffusion_mlp_ta8_td20/2024-07-08_11-18-59/checkpoint/state_8000.pt"
         in path