v0.7 (#26)
* update from scratch configs * update gym pretraining configs - use fewer epochs * update robomimic pretraining configs - use fewer epochs * allow trajectory plotting in eval agent * add simple vit unet * update avoid pretraining configs - use fewer epochs * update furniture pretraining configs - use same amount of epochs as before * add robomimic diffusion unet pretraining configs * update robomimic finetuning configs - higher lr * add vit unet checkpoint urls * update pretraining and finetuning instructions as configs are updated
This commit is contained in:
parent
d2929f65e1
commit
1d04211666
@ -57,6 +57,7 @@ class EvalAgent:
|
||||
self.horizon_steps = cfg.horizon_steps
|
||||
self.max_episode_steps = cfg.env.max_episode_steps
|
||||
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
|
||||
self.save_full_observations = cfg.env.get("save_full_observations", False)
|
||||
self.furniture_sparse_reward = (
|
||||
cfg.env.specific.get("sparse_reward", False)
|
||||
if "specific" in cfg.env
|
||||
@ -85,6 +86,10 @@ class EvalAgent:
|
||||
assert not (
|
||||
self.n_render <= 0 and self.render_video
|
||||
), "Need to set n_render > 0 if saving video"
|
||||
self.traj_plotter = (
|
||||
hydra.utils.instantiate(cfg.plotter)
|
||||
if "plotter" in cfg else None
|
||||
)
|
||||
|
||||
def run(self):
|
||||
pass
|
||||
|
@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent):
|
||||
prev_obs_venv = self.reset_env_all(options_venv=options_venv)
|
||||
firsts_trajs[0] = 1
|
||||
reward_trajs = np.zeros((self.n_steps, self.n_envs))
|
||||
if self.save_full_observations: # state-only
|
||||
obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
|
||||
obs_full_trajs = np.vstack(
|
||||
(obs_full_trajs, prev_obs_venv["state"][:, -1][None])
|
||||
)
|
||||
|
||||
# Collect a set of trajectories from env
|
||||
for step in range(self.n_steps):
|
||||
@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent):
|
||||
)
|
||||
reward_trajs[step] = reward_venv
|
||||
firsts_trajs[step + 1] = terminated_venv | truncated_venv
|
||||
if self.save_full_observations: # state-only
|
||||
obs_full_venv = np.array(
|
||||
[info["full_obs"]["state"] for info in info_venv]
|
||||
) # n_envs x act_steps x obs_dim
|
||||
obs_full_trajs = np.vstack(
|
||||
(obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
|
||||
)
|
||||
|
||||
# update for next step
|
||||
prev_obs_venv = obs_venv
|
||||
@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent):
|
||||
success_rate = 0
|
||||
log.info("[WARNING] No episode completed within the iteration!")
|
||||
|
||||
# Plot state trajectories (only in D3IL)
|
||||
if self.traj_plotter is not None:
|
||||
self.traj_plotter(
|
||||
obs_full_trajs=obs_full_trajs,
|
||||
n_render=self.n_render,
|
||||
max_episode_steps=self.max_episode_steps,
|
||||
render_dir=self.render_dir,
|
||||
itr=0,
|
||||
)
|
||||
|
||||
# Log loss and save metrics
|
||||
time = timer()
|
||||
log.info(
|
||||
|
68
cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
Normal file
68
cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: avoiding-m5
|
||||
obs_dim: 4
|
||||
action_dim: 2
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 25
|
||||
render_num: 40
|
||||
|
||||
plotter:
|
||||
_target_: env.plot_traj.TrajPlotter
|
||||
env_type: avoid
|
||||
normalization_path: ${normalization_path}
|
||||
|
||||
env:
|
||||
n_envs: 40
|
||||
name: ${env_name}
|
||||
max_episode_steps: 100
|
||||
reset_at_iteration: True
|
||||
save_video: False
|
||||
best_reward_threshold_for_success: 2
|
||||
save_full_observations: True
|
||||
wrappers:
|
||||
d3il_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
pass_full_observations: ${env.save_full_observations}
|
||||
reset_within_step: False
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 16
|
||||
mlp_dims: [512, 512, 512]
|
||||
activation_type: ReLU
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 15000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 15000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
batch_size: 32
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 15000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 15000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
batch_size: 32
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 15000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 15000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
n_epochs: 5000
|
||||
batch_size: 16
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
n_epochs: 5000
|
||||
batch_size: 32
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 5000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -1,5 +1,7 @@
|
||||
## Fine-tuning experiments
|
||||
|
||||
**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.
|
||||
|
||||
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
||||
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.
|
||||
|
||||
|
66
cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
Normal file
66
cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,66 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||
obs_dim: 44
|
||||
action_dim: 10
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
horizon_steps: 8
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 1000
|
||||
name: ${env_name}
|
||||
env_type: furniture
|
||||
max_episode_steps: 1000
|
||||
best_reward_threshold_for_success: 2
|
||||
specific:
|
||||
headless: true
|
||||
furniture: lamp
|
||||
randomness: low
|
||||
normalization_path: ${normalization_path}
|
||||
obs_steps: ${cond_steps}
|
||||
act_steps: ${act_steps}
|
||||
sparse_reward: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 32
|
||||
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
|
||||
cond_mlp_dims: [512, 64]
|
||||
use_layernorm: True # needed for larger MLP
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
68
cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||
obs_dim: 44
|
||||
action_dim: 10
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
horizon_steps: 16
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 1000
|
||||
name: ${env_name}
|
||||
env_type: furniture
|
||||
max_episode_steps: 1000
|
||||
best_reward_threshold_for_success: 2
|
||||
specific:
|
||||
headless: true
|
||||
furniture: lamp
|
||||
randomness: low
|
||||
normalization_path: ${normalization_path}
|
||||
obs_steps: ${cond_steps}
|
||||
act_steps: ${act_steps}
|
||||
sparse_reward: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 64
|
||||
dim_mults: [1, 2, 4]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
groupnorm_eps: 1e-4 # not important
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
|
68
cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||
obs_dim: 58
|
||||
action_dim: 10
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
horizon_steps: 16
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 1000
|
||||
name: ${env_name}
|
||||
env_type: furniture
|
||||
max_episode_steps: 700
|
||||
best_reward_threshold_for_success: 1
|
||||
specific:
|
||||
headless: true
|
||||
furniture: one_leg
|
||||
randomness: low
|
||||
normalization_path: ${normalization_path}
|
||||
obs_steps: ${cond_steps}
|
||||
act_steps: ${act_steps}
|
||||
sparse_reward: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 64
|
||||
dim_mults: [1, 2, 4]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
groupnorm_eps: 1e-4 # not important
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
66
cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
Normal file
66
cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,66 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||
obs_dim: 44
|
||||
action_dim: 10
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
horizon_steps: 8
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 1000
|
||||
name: ${env_name}
|
||||
env_type: furniture
|
||||
max_episode_steps: 1000
|
||||
best_reward_threshold_for_success: 2
|
||||
specific:
|
||||
headless: true
|
||||
furniture: round_table
|
||||
randomness: low
|
||||
normalization_path: ${normalization_path}
|
||||
obs_steps: ${cond_steps}
|
||||
act_steps: ${act_steps}
|
||||
sparse_reward: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 32
|
||||
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
|
||||
cond_mlp_dims: [512, 64]
|
||||
use_layernorm: True # needed for larger MLP
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
68
cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||
obs_dim: 44
|
||||
action_dim: 10
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
horizon_steps: 16
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 1000
|
||||
name: ${env_name}
|
||||
env_type: furniture
|
||||
max_episode_steps: 1000
|
||||
best_reward_threshold_for_success: 2
|
||||
specific:
|
||||
headless: true
|
||||
furniture: round_table
|
||||
randomness: low
|
||||
normalization_path: ${normalization_path}
|
||||
obs_steps: ${cond_steps}
|
||||
act_steps: ${act_steps}
|
||||
sparse_reward: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 64
|
||||
dim_mults: [1, 2, 4]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
groupnorm_eps: 1e-4 # not important
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -30,7 +30,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -30,7 +30,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -30,7 +30,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -25,12 +25,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 10000
|
||||
n_epochs: 3000
|
||||
batch_size: 256
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -30,7 +30,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -31,7 +31,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 8000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -30,7 +30,7 @@ train:
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-5
|
||||
save_model_freq: 500
|
||||
|
@ -17,10 +17,10 @@ obs_dim: 17
|
||||
action_dim: 6
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 1
|
||||
act_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 1000 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
|
@ -20,7 +20,7 @@ cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 500 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
|
61
cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
Normal file
61
cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,61 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: walker2d-medium-v2
|
||||
obs_dim: 17
|
||||
action_dim: 6
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 40
|
||||
name: ${env_name}
|
||||
max_episode_steps: 1000
|
||||
reset_at_iteration: False
|
||||
save_video: False
|
||||
best_reward_threshold_for_success: 3 # success rate not relevant for gym tasks
|
||||
wrappers:
|
||||
mujoco_locomotion_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 16
|
||||
mlp_dims: [512, 512, 512]
|
||||
activation_type: ReLU
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 3000
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 3000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 100
|
||||
|
@ -23,15 +23,14 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 500
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
|
||||
save_model_freq: 100
|
||||
|
||||
model:
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 3000
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 3000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 100
|
||||
|
@ -23,12 +23,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 500
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 100
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 8000
|
||||
n_epochs: 3000
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 8000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -23,12 +23,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 5000
|
||||
n_epochs: 3000
|
||||
batch_size: 256
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 0
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 5000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 100
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 8000
|
||||
n_epochs: 3000
|
||||
batch_size: 256
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 8000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -23,12 +23,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 5000
|
||||
n_epochs: 3000
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 5000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 8000
|
||||
n_epochs: 3000
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-5
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 8000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -23,12 +23,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 5000
|
||||
n_epochs: 3000
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 5000
|
||||
first_cycle_steps: 3000
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 500
|
||||
|
@ -24,12 +24,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 3000
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-3
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 3000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 100
|
||||
|
@ -23,12 +23,12 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_epochs: 3000
|
||||
n_epochs: 200
|
||||
batch_size: 128
|
||||
learning_rate: 1e-4
|
||||
weight_decay: 1e-6
|
||||
lr_scheduler:
|
||||
first_cycle_steps: 3000
|
||||
first_cycle_steps: 200
|
||||
warmup_steps: 1
|
||||
min_lr: 1e-4
|
||||
save_model_freq: 100
|
||||
|
@ -42,7 +42,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 501
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -55,7 +55,7 @@ train:
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -67,7 +67,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 10000
|
||||
batch_size: 5000
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
@ -75,7 +75,7 @@ train:
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
gamma_denoising: 1
|
||||
clip_ploss_coef: 0.1
|
||||
clip_ploss_coef_base: 0.1
|
||||
clip_ploss_coef_rate: 3
|
||||
@ -94,10 +94,10 @@ model:
|
||||
residual_style: True
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
|
@ -40,7 +40,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 501
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -65,7 +65,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 1000
|
||||
batch_size: 500
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
@ -42,7 +42,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 301
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -67,7 +67,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 10000
|
||||
batch_size: 5000
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
@ -75,7 +75,7 @@ train:
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
gamma_denoising: 1
|
||||
clip_ploss_coef: 0.1
|
||||
clip_ploss_coef_base: 0.1
|
||||
clip_ploss_coef_rate: 3
|
||||
|
@ -40,7 +40,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 301
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -65,7 +65,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 1000
|
||||
batch_size: 500
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
@ -42,7 +42,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 501
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -55,7 +55,7 @@ train:
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 10000
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -67,7 +67,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 10000
|
||||
batch_size: 5000
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
@ -75,7 +75,7 @@ train:
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
gamma_denoising: 1
|
||||
clip_ploss_coef: 0.1
|
||||
clip_ploss_coef_base: 0.1
|
||||
clip_ploss_coef_rate: 3
|
||||
@ -94,10 +94,10 @@ model:
|
||||
residual_style: True
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
|
@ -40,7 +40,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 301
|
||||
n_critic_warmup_itr: 0
|
||||
n_steps: 1000
|
||||
gamma: 0.99
|
||||
@ -65,7 +65,7 @@ train:
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 1000
|
||||
batch_size: 500
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
@ -1,6 +1,6 @@
|
||||
## Pre-training experiments
|
||||
|
||||
**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints.
|
||||
**Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6.
|
||||
|
||||
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
||||
Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.
|
||||
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
@ -28,7 +28,7 @@ n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
n_envs: 20 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
|
68
cfg/robomimic/eval/can/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/can/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: can
|
||||
obs_dim: 23
|
||||
action_dim: 7
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 40
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 40
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
102
cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
Normal file
102
cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,102 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: can
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 20 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['robot0_eye_in_hand_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 40
|
||||
dim_mults:
|
||||
- 1
|
||||
- 2
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: false
|
||||
cond_predict_scale: true
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
|
||||
|
||||
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent
|
||||
|
||||
name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
|
65
cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
Normal file
65
cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,65 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: lift
|
||||
obs_dim: 19
|
||||
action_dim: 7
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 16
|
||||
mlp_dims: [512, 512, 512]
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
97
cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
Normal file
97
cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,97 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: lift
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 20 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['robot0_eye_in_hand_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
time_dim: 32
|
||||
mlp_dims: [512, 512, 512]
|
||||
residual_style: True
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
68
cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: lift
|
||||
obs_dim: 19
|
||||
action_dim: 7
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 40
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 40
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
100
cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
Normal file
100
cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,100 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: lift
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 20 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['robot0_eye_in_hand_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 40
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -18,8 +18,8 @@ obs_dim: 23
|
||||
action_dim: 7
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 1
|
||||
act_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
97
cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
Normal file
97
cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,97 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: square
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 20 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['agentview_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
time_dim: 32
|
||||
mlp_dims: [768, 768, 768]
|
||||
residual_style: True
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
68
cfg/robomimic/eval/square/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/square/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: square
|
||||
obs_dim: 23
|
||||
action_dim: 7
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
|
||||
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 64
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
102
cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
Normal file
102
cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,102 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: square
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 30 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['agentview_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 64
|
||||
dim_mults:
|
||||
- 1
|
||||
- 2
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: false
|
||||
cond_predict_scale: true
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -3,9 +3,9 @@ defaults:
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
|
||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: square
|
||||
obs_dim: 23
|
||||
action_dim: 7
|
||||
env_name: transport
|
||||
obs_dim: 59
|
||||
action_dim: 14
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 1
|
||||
act_steps: 1
|
||||
horizon_steps: 8
|
||||
act_steps: 8
|
||||
|
||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
@ -27,7 +28,7 @@ env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
max_episode_steps: 800
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
@ -35,6 +36,9 @@ env:
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
"robot1_eef_pos",
|
||||
"robot1_eef_quat",
|
||||
"robot1_gripper_qpos",
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
@ -42,19 +46,24 @@ env:
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
|
||||
model:
|
||||
_target_: model.common.gaussian.GaussianModel
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.common.mlp_gaussian.Gaussian_MLP
|
||||
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||
time_dim: 32
|
||||
mlp_dims: [1024, 1024, 1024]
|
||||
activation_type: ReLU
|
||||
use_layernorm: true
|
||||
fixed_std: 0.1
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
102
cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
Normal file
102
cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,102 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: transport
|
||||
obs_dim: 18
|
||||
action_dim: 14
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 8
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 200 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 30 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 800
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
"robot1_eef_pos",
|
||||
"robot1_eef_quat",
|
||||
"robot1_gripper_qpos"]
|
||||
image_keys: ['shouldercamera0_image',
|
||||
'shouldercamera1_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [6, 96, 96]
|
||||
state:
|
||||
shape: [18]
|
||||
action:
|
||||
shape: [14]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
augment: False
|
||||
num_img: 2
|
||||
spatial_emb: 128
|
||||
time_dim: 32
|
||||
mlp_dims: [768, 768, 768]
|
||||
residual_style: True
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
71
cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
Normal file
71
cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,71 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: transport
|
||||
obs_dim: 59
|
||||
action_dim: 14
|
||||
denoising_steps: 20
|
||||
cond_steps: 1
|
||||
horizon_steps: 16
|
||||
act_steps: 8
|
||||
|
||||
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 800
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
"robot1_eef_pos",
|
||||
"robot1_eef_quat",
|
||||
"robot1_gripper_qpos",
|
||||
'object'] # same order of preprocessed observations
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.Unet1D
|
||||
diffusion_step_embed_dim: 16
|
||||
dim: 64
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
action_dim: ${action_dim}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
107
cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
Normal file
107
cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,107 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path:
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: transport
|
||||
obs_dim: 18
|
||||
action_dim: 14
|
||||
denoising_steps: 100
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 16
|
||||
act_steps: 8
|
||||
use_ddim: True
|
||||
ddim_steps: 5
|
||||
|
||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||
render_num: 0
|
||||
|
||||
env:
|
||||
n_envs: 30 # reduce gpu usage
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 800
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos',
|
||||
"robot1_eef_pos",
|
||||
"robot1_eef_quat",
|
||||
"robot1_gripper_qpos"]
|
||||
image_keys: ['shouldercamera0_image',
|
||||
'shouldercamera1_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [6, 96, 96]
|
||||
state:
|
||||
shape: [18]
|
||||
action:
|
||||
shape: [14]
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion.DiffusionModel
|
||||
predict_epsilon: True
|
||||
denoised_clip_value: 1.0
|
||||
randn_clip_value: 3
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ddim_steps}
|
||||
network_path: ${base_policy_path}
|
||||
network:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
num_img: 2
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 64
|
||||
dim_mults:
|
||||
- 1
|
||||
- 2
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: false
|
||||
cond_predict_scale: true
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||
|
||||
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
|
||||
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
@ -54,13 +55,13 @@ train:
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -66,16 +66,16 @@ train:
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 15
|
||||
actor_lr: 1e-4
|
||||
actor_lr: 5e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-4
|
||||
min_lr: 5e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -27,7 +27,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -47,20 +47,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
173
cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: can
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
ft_denoising_steps: 5
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['robot0_eye_in_hand_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
wandb:
|
||||
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||
project: robomimic-${env_name}-finetune
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 15
|
||||
actor_lr: 5e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 5e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
render:
|
||||
freq: 1
|
||||
num: 0
|
||||
# PPO specific
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 500
|
||||
logprob_batch_size: 500
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
clip_ploss_coef: 0.01
|
||||
clip_ploss_coef_base: 0.001
|
||||
clip_ploss_coef_rate: 3
|
||||
randn_clip_value: 3
|
||||
min_sampling_denoising_std: 0.1
|
||||
min_logprob_denoising_std: 0.1
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ft_denoising_steps}
|
||||
learn_eta: False
|
||||
eta:
|
||||
base_eta: 1
|
||||
input_dim: ${obs_dim}
|
||||
mlp_dims: [256, 256]
|
||||
action_dim: ${action_dim}
|
||||
min_eta: 0.1
|
||||
max_eta: 1.0
|
||||
_target_: model.diffusion.eta.EtaFixed
|
||||
network_path: ${base_policy_path}
|
||||
actor:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 40
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
critic:
|
||||
_target_: model.common.critic.ViTCritic
|
||||
spatial_emb: 128
|
||||
augment: False
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -45,20 +45,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -57,22 +57,22 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 200
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 5
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 200
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 200
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -140,9 +140,9 @@ model:
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -45,20 +45,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -46,20 +46,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 151
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||
|
||||
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
|
||||
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||
|
||||
@ -54,13 +55,13 @@ train:
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -60,22 +60,22 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 151
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 15
|
||||
actor_lr: 1e-4
|
||||
actor_lr: 5e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-4
|
||||
min_lr: 5e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -27,7 +27,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -47,20 +47,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -102,10 +102,10 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
|
173
cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: lift
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
ft_denoising_steps: 5
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['robot0_eye_in_hand_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
wandb:
|
||||
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||
project: robomimic-${env_name}-finetune
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 15
|
||||
actor_lr: 5e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 5e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
render:
|
||||
freq: 1
|
||||
num: 0
|
||||
# PPO specific
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 500
|
||||
logprob_batch_size: 500
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
clip_ploss_coef: 0.01
|
||||
clip_ploss_coef_base: 0.001
|
||||
clip_ploss_coef_rate: 3
|
||||
randn_clip_value: 3
|
||||
min_sampling_denoising_std: 0.1
|
||||
min_logprob_denoising_std: 0.1
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ft_denoising_steps}
|
||||
learn_eta: False
|
||||
eta:
|
||||
base_eta: 1
|
||||
input_dim: ${obs_dim}
|
||||
mlp_dims: [256, 256]
|
||||
action_dim: ${action_dim}
|
||||
min_eta: 0.1
|
||||
max_eta: 1.0
|
||||
_target_: model.diffusion.eta.EtaFixed
|
||||
network_path: ${base_policy_path}
|
||||
actor:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 40
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
critic:
|
||||
_target_: model.common.critic.ViTCritic
|
||||
spatial_emb: 128
|
||||
augment: False
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -45,20 +45,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -93,9 +93,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -57,22 +57,22 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 200
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 5
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 200
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 200
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -140,9 +140,9 @@ model:
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -45,20 +45,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -94,9 +94,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -26,7 +26,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -46,20 +46,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -94,9 +94,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -26,7 +26,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 300
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -46,20 +46,20 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 300
|
||||
n_train_itr: 81
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 300
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -95,9 +95,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -27,7 +27,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -54,14 +54,14 @@ train:
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
|
@ -69,13 +69,13 @@ train:
|
||||
actor_lr: 1e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
|
@ -27,7 +27,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -47,21 +47,21 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 201
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 400
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 2e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
@ -102,10 +102,10 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
|
173
cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
||||
defaults:
|
||||
- _self_
|
||||
hydra:
|
||||
run:
|
||||
dir: ${logdir}
|
||||
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||
|
||||
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt
|
||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||
|
||||
seed: 42
|
||||
device: cuda:0
|
||||
env_name: square
|
||||
obs_dim: 9
|
||||
action_dim: 7
|
||||
denoising_steps: 100
|
||||
ft_denoising_steps: 5
|
||||
cond_steps: 1
|
||||
img_cond_steps: 1
|
||||
horizon_steps: 4
|
||||
act_steps: 4
|
||||
use_ddim: True
|
||||
|
||||
env:
|
||||
n_envs: 50
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: False
|
||||
use_image_obs: True
|
||||
wrappers:
|
||||
robomimic_image:
|
||||
normalization_path: ${normalization_path}
|
||||
low_dim_keys: ['robot0_eef_pos',
|
||||
'robot0_eef_quat',
|
||||
'robot0_gripper_qpos']
|
||||
image_keys: ['agentview_image']
|
||||
shape_meta: ${shape_meta}
|
||||
multi_step:
|
||||
n_obs_steps: ${cond_steps}
|
||||
n_action_steps: ${act_steps}
|
||||
max_episode_steps: ${env.max_episode_steps}
|
||||
reset_within_step: True
|
||||
|
||||
shape_meta:
|
||||
obs:
|
||||
rgb:
|
||||
shape: [3, 96, 96]
|
||||
state:
|
||||
shape: [9]
|
||||
action:
|
||||
shape: [7]
|
||||
|
||||
wandb:
|
||||
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||
project: robomimic-${env_name}-finetune
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 301
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 400
|
||||
gamma: 0.999
|
||||
augment: True
|
||||
grad_accumulate: 20
|
||||
actor_lr: 1e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
render:
|
||||
freq: 1
|
||||
num: 0
|
||||
# PPO specific
|
||||
reward_scale_running: True
|
||||
reward_scale_const: 1.0
|
||||
gae_lambda: 0.95
|
||||
batch_size: 500
|
||||
logprob_batch_size: 1000
|
||||
update_epochs: 10
|
||||
vf_coef: 0.5
|
||||
target_kl: 1
|
||||
|
||||
model:
|
||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||
# HP to tune
|
||||
gamma_denoising: 0.99
|
||||
clip_ploss_coef: 0.01
|
||||
clip_ploss_coef_base: 0.001
|
||||
clip_ploss_coef_rate: 3
|
||||
randn_clip_value: 3
|
||||
min_sampling_denoising_std: 0.1
|
||||
min_logprob_denoising_std: 0.1
|
||||
#
|
||||
use_ddim: ${use_ddim}
|
||||
ddim_steps: ${ft_denoising_steps}
|
||||
learn_eta: False
|
||||
eta:
|
||||
base_eta: 1
|
||||
input_dim: ${obs_dim}
|
||||
mlp_dims: [256, 256]
|
||||
action_dim: ${action_dim}
|
||||
min_eta: 0.1
|
||||
max_eta: 1.0
|
||||
_target_: model.diffusion.eta.EtaFixed
|
||||
network_path: ${base_policy_path}
|
||||
actor:
|
||||
_target_: model.diffusion.unet.VisionUnet1D
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
augment: False
|
||||
spatial_emb: 128
|
||||
diffusion_step_embed_dim: 32
|
||||
dim: 64
|
||||
dim_mults: [1, 2]
|
||||
kernel_size: 5
|
||||
n_groups: 8
|
||||
smaller_encoder: False
|
||||
cond_predict_scale: True
|
||||
action_dim: ${action_dim}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
critic:
|
||||
_target_: model.common.critic.ViTCritic
|
||||
spatial_emb: 128
|
||||
augment: False
|
||||
backbone:
|
||||
_target_: model.common.vit.VitEncoder
|
||||
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||
cfg:
|
||||
patch_size: 8
|
||||
depth: 1
|
||||
embed_dim: 128
|
||||
num_heads: 4
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
ft_denoising_steps: ${ft_denoising_steps}
|
||||
horizon_steps: ${horizon_steps}
|
||||
obs_dim: ${obs_dim}
|
||||
action_dim: ${action_dim}
|
||||
denoising_steps: ${denoising_steps}
|
||||
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -45,21 +45,21 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 201
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 400
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
@ -93,9 +93,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -57,7 +57,7 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 500
|
||||
n_train_itr: 301
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 400
|
||||
gamma: 0.999
|
||||
@ -66,13 +66,13 @@ train:
|
||||
actor_lr: 1e-5
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 500
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 500
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
@ -140,9 +140,9 @@ model:
|
||||
embed_style: embed2
|
||||
embed_norm: 0
|
||||
img_cond_steps: ${img_cond_steps}
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
||||
name: ${env_name}
|
||||
best_reward_threshold_for_success: 1
|
||||
max_episode_steps: 400
|
||||
save_video: false
|
||||
save_video: False
|
||||
wrappers:
|
||||
robomimic_lowdim:
|
||||
normalization_path: ${normalization_path}
|
||||
@ -45,21 +45,21 @@ wandb:
|
||||
run: ${now:%H-%M-%S}_${name}
|
||||
|
||||
train:
|
||||
n_train_itr: 1000
|
||||
n_train_itr: 201
|
||||
n_critic_warmup_itr: 2
|
||||
n_steps: 400
|
||||
gamma: 0.999
|
||||
actor_lr: 1e-5
|
||||
actor_lr: 1e-4
|
||||
actor_weight_decay: 0
|
||||
actor_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
min_lr: 1e-5
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-4
|
||||
critic_lr: 1e-3
|
||||
critic_weight_decay: 0
|
||||
critic_lr_scheduler:
|
||||
first_cycle_steps: 1000
|
||||
warmup_steps: 10
|
||||
first_cycle_steps: ${train.n_train_itr}
|
||||
warmup_steps: 0
|
||||
min_lr: 1e-3
|
||||
save_model_freq: 100
|
||||
val_freq: 10
|
||||
@ -94,9 +94,9 @@ model:
|
||||
action_dim: ${action_dim}
|
||||
critic:
|
||||
_target_: model.common.critic.CriticObs
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
mlp_dims: [256, 256, 256]
|
||||
activation_type: Mish
|
||||
residual_style: True
|
||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||
horizon_steps: ${horizon_steps}
|
||||
device: ${device}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user