* update from scratch configs

* update gym pretraining configs - use fewer epochs

* update robomimic pretraining configs - use fewer epochs

* allow trajectory plotting in eval agent

* add simple vit unet

* update avoid pretraining configs - use fewer epochs

* update furniture pretraining configs - use same amount of epochs as before

* add robomimic diffusion unet pretraining configs

* update robomimic finetuning configs - higher lr

* add vit unet checkpoint urls

* update pretraining and finetuning instructions as configs are updated
This commit is contained in:
Allen Z. Ren 2024-11-20 15:47:52 -05:00 committed by allenzren
parent d2929f65e1
commit 1d04211666
158 changed files with 3350 additions and 410 deletions

View File

@ -57,6 +57,7 @@ class EvalAgent:
self.horizon_steps = cfg.horizon_steps
self.max_episode_steps = cfg.env.max_episode_steps
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
self.save_full_observations = cfg.env.get("save_full_observations", False)
self.furniture_sparse_reward = (
cfg.env.specific.get("sparse_reward", False)
if "specific" in cfg.env
@ -85,6 +86,10 @@ class EvalAgent:
assert not (
self.n_render <= 0 and self.render_video
), "Need to set n_render > 0 if saving video"
self.traj_plotter = (
hydra.utils.instantiate(cfg.plotter)
if "plotter" in cfg else None
)
def run(self):
pass

View File

@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent):
prev_obs_venv = self.reset_env_all(options_venv=options_venv)
firsts_trajs[0] = 1
reward_trajs = np.zeros((self.n_steps, self.n_envs))
if self.save_full_observations: # state-only
obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
obs_full_trajs = np.vstack(
(obs_full_trajs, prev_obs_venv["state"][:, -1][None])
)
# Collect a set of trajectories from env
for step in range(self.n_steps):
@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent):
)
reward_trajs[step] = reward_venv
firsts_trajs[step + 1] = terminated_venv | truncated_venv
if self.save_full_observations: # state-only
obs_full_venv = np.array(
[info["full_obs"]["state"] for info in info_venv]
) # n_envs x act_steps x obs_dim
obs_full_trajs = np.vstack(
(obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
)
# update for next step
prev_obs_venv = obs_venv
@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent):
success_rate = 0
log.info("[WARNING] No episode completed within the iteration!")
# Plot state trajectories (only in D3IL)
if self.traj_plotter is not None:
self.traj_plotter(
obs_full_trajs=obs_full_trajs,
n_render=self.n_render,
max_episode_steps=self.max_episode_steps,
render_dir=self.render_dir,
itr=0,
)
# Log loss and save metrics
time = timer()
log.info(

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz
seed: 42
device: cuda:0
env_name: avoiding-m5
obs_dim: 4
action_dim: 2
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 25
render_num: 40
plotter:
_target_: env.plot_traj.TrajPlotter
env_type: avoid
normalization_path: ${normalization_path}
env:
n_envs: 40
name: ${env_name}
max_episode_steps: 100
reset_at_iteration: True
save_video: False
best_reward_threshold_for_success: 2
save_full_observations: True
wrappers:
d3il_lowdim:
normalization_path: ${normalization_path}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
pass_full_observations: ${env.save_full_observations}
reset_within_step: False
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
activation_type: ReLU
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
batch_size: 32
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
batch_size: 32
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 15000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 15000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
n_epochs: 5000
batch_size: 16
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
n_epochs: 5000
batch_size: 32
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 5000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -1,5 +1,7 @@
## Fine-tuning experiments
**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.
### Comparing diffusion-based RL algorithms (Sec. 5.1)
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.

View File

@ -0,0 +1,66 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: lamp
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
cond_mlp_dims: [512, 64]
use_layernorm: True # needed for larger MLP
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: lamp
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 58
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 700
best_reward_threshold_for_success: 1
specific:
headless: true
furniture: one_leg
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,66 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: round_table
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
cond_mlp_dims: [512, 64]
use_layernorm: True # needed for larger MLP
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: round_table
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 10000
n_epochs: 3000
batch_size: 256
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 8000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-5
save_model_freq: 500

View File

@ -17,10 +17,10 @@ obs_dim: 17
action_dim: 6
denoising_steps: 20
cond_steps: 1
horizon_steps: 1
act_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 1000 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0
env:

View File

@ -20,7 +20,7 @@ cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 500 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0
env:

View File

@ -0,0 +1,61 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: walker2d-medium-v2
obs_dim: 17
action_dim: 6
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0
env:
n_envs: 40
name: ${env_name}
max_episode_steps: 1000
reset_at_iteration: False
save_video: False
best_reward_threshold_for_success: 3 # success rate not relevant for gym tasks
wrappers:
mujoco_locomotion_lowdim:
normalization_path: ${normalization_path}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
activation_type: ReLU
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 3000
n_epochs: 200
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 3000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100

View File

@ -23,15 +23,14 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 500
n_epochs: 200
batch_size: 128
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100
model:

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 3000
n_epochs: 200
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 3000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 500
n_epochs: 200
batch_size: 128
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 8000
n_epochs: 3000
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 8000
first_cycle_steps: 3000
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 5000
n_epochs: 3000
batch_size: 256
learning_rate: 1e-4
weight_decay: 0
lr_scheduler:
first_cycle_steps: 5000
first_cycle_steps: 3000
warmup_steps: 100
min_lr: 1e-4
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 8000
n_epochs: 3000
batch_size: 256
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 8000
first_cycle_steps: 3000
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 5000
n_epochs: 3000
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 5000
first_cycle_steps: 3000
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 8000
n_epochs: 3000
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-5
lr_scheduler:
first_cycle_steps: 8000
first_cycle_steps: 3000
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 5000
n_epochs: 3000
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 5000
first_cycle_steps: 3000
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 3000
n_epochs: 200
batch_size: 128
learning_rate: 1e-3
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 3000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_epochs: 3000
n_epochs: 200
batch_size: 128
learning_rate: 1e-4
weight_decay: 1e-6
lr_scheduler:
first_cycle_steps: 3000
first_cycle_steps: 200
warmup_steps: 1
min_lr: 1e-4
save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 501
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -55,7 +55,7 @@ train:
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 1000
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -67,7 +67,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 10000
batch_size: 5000
update_epochs: 10
vf_coef: 0.5
target_kl: 1
@ -75,7 +75,7 @@ train:
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
gamma_denoising: 1
clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3
@ -94,10 +94,10 @@ model:
residual_style: True
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 501
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 1000
batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1

View File

@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 301
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -67,7 +67,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 10000
batch_size: 5000
update_epochs: 10
vf_coef: 0.5
target_kl: 1
@ -75,7 +75,7 @@ train:
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
gamma_denoising: 1
clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 301
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 1000
batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1

View File

@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 501
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -55,7 +55,7 @@ train:
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 10000
first_cycle_steps: 1000
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -67,7 +67,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 10000
batch_size: 5000
update_epochs: 10
vf_coef: 0.5
target_kl: 1
@ -75,7 +75,7 @@ train:
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
gamma_denoising: 1
clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3
@ -94,10 +94,10 @@ model:
residual_style: True
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 301
n_critic_warmup_itr: 0
n_steps: 1000
gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 1000
batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1

View File

@ -1,6 +1,6 @@
## Pre-training experiments
**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints.
**Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6.
### Comparing diffusion-based RL algorithms (Sec. 5.1)
Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
@ -28,7 +28,7 @@ n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 23
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 40
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent
name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz

View File

@ -0,0 +1,65 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 19
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,97 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
spatial_emb: 128
time_dim: 32
mlp_dims: [512, 512, 512]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 19
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 40
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,100 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -18,8 +18,8 @@ obs_dim: 23
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 1
act_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0

View File

@ -0,0 +1,97 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
spatial_emb: 128
time_dim: 32
mlp_dims: [768, 768, 768]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 23
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -3,9 +3,9 @@ defaults:
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.
seed: 42
device: cuda:0
env_name: square
obs_dim: 23
action_dim: 7
env_name: transport
obs_dim: 59
action_dim: 14
denoising_steps: 20
cond_steps: 1
horizon_steps: 1
act_steps: 1
horizon_steps: 8
act_steps: 8
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
@ -27,7 +28,7 @@ env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
max_episode_steps: 800
save_video: False
wrappers:
robomimic_lowdim:
@ -35,6 +36,9 @@ env:
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos",
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
@ -42,19 +46,24 @@ env:
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.common.gaussian.GaussianModel
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.common.mlp_gaussian.Gaussian_MLP
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024]
activation_type: ReLU
use_layernorm: true
fixed_std: 0.1
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 18
action_dim: 14
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: 200 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos"]
image_keys: ['shouldercamera0_image',
'shouldercamera1_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [6, 96, 96]
state:
shape: [18]
action:
shape: [14]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
num_img: 2
spatial_emb: 128
time_dim: 32
mlp_dims: [768, 768, 768]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,71 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 59
action_dim: 14
denoising_steps: 20
cond_steps: 1
horizon_steps: 16
act_steps: 8
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos",
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,107 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 18
action_dim: 14
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos"]
image_keys: ['shouldercamera0_image',
'shouldercamera1_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [6, 96, 96]
state:
shape: [18]
action:
shape: [14]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
num_img: 2
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
@ -54,13 +55,13 @@ train:
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -66,16 +66,16 @@ train:
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 1e-4
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-4
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -27,7 +27,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -47,20 +47,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
@ -57,22 +57,22 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 200
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 5
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 200
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 200
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -1,13 +1,14 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
@ -54,13 +55,13 @@ train:
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -60,22 +60,22 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 151
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 1e-4
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-4
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -27,7 +27,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -47,20 +47,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -102,10 +102,10 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -93,9 +93,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -1,7 +1,7 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
@ -57,22 +57,22 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 200
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 5
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 200
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 200
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -26,7 +26,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -26,7 +26,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 300
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -95,9 +95,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -1,7 +1,7 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -27,7 +27,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -54,14 +54,14 @@ train:
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-3
save_model_freq: 100
val_freq: 10

View File

@ -69,13 +69,13 @@ train:
actor_lr: 1e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults:
- _self_
hydra:
run:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -27,7 +27,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -47,21 +47,21 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 201
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
actor_lr: 1e-5
actor_lr: 2e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
min_lr: 1e-5
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
@ -102,10 +102,10 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 301
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
augment: True
grad_accumulate: 20
actor_lr: 1e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 1000
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -45,21 +45,21 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 201
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
min_lr: 1e-5
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
@ -93,9 +93,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -57,7 +57,7 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 500
n_train_itr: 301
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
@ -66,13 +66,13 @@ train:
actor_lr: 1e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 500
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 500
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: false
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
@ -45,21 +45,21 @@ wandb:
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 1000
n_train_itr: 201
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
actor_lr: 1e-5
actor_lr: 1e-4
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
min_lr: 1e-5
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-4
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: 1000
warmup_steps: 10
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 0
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim}
critic:
_target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
device: ${device}

Some files were not shown because too many files have changed in this diff Show More