* update from scratch configs

* update gym pretraining configs - use fewer epochs

* update robomimic pretraining configs - use fewer epochs

* allow trajectory plotting in eval agent

* add simple vit unet

* update avoid pretraining configs - use fewer epochs

* update furniture pretraining configs - use same amount of epochs as before

* add robomimic diffusion unet pretraining configs

* update robomimic finetuning configs - higher lr

* add vit unet checkpoint urls

* update pretraining and finetuning instructions as configs are updated
This commit is contained in:
Allen Z. Ren 2024-11-20 15:47:52 -05:00 committed by allenzren
parent d2929f65e1
commit 1d04211666
158 changed files with 3350 additions and 410 deletions

View File

@ -57,6 +57,7 @@ class EvalAgent:
self.horizon_steps = cfg.horizon_steps self.horizon_steps = cfg.horizon_steps
self.max_episode_steps = cfg.env.max_episode_steps self.max_episode_steps = cfg.env.max_episode_steps
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True) self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
self.save_full_observations = cfg.env.get("save_full_observations", False)
self.furniture_sparse_reward = ( self.furniture_sparse_reward = (
cfg.env.specific.get("sparse_reward", False) cfg.env.specific.get("sparse_reward", False)
if "specific" in cfg.env if "specific" in cfg.env
@ -85,6 +86,10 @@ class EvalAgent:
assert not ( assert not (
self.n_render <= 0 and self.render_video self.n_render <= 0 and self.render_video
), "Need to set n_render > 0 if saving video" ), "Need to set n_render > 0 if saving video"
self.traj_plotter = (
hydra.utils.instantiate(cfg.plotter)
if "plotter" in cfg else None
)
def run(self): def run(self):
pass pass

View File

@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent):
prev_obs_venv = self.reset_env_all(options_venv=options_venv) prev_obs_venv = self.reset_env_all(options_venv=options_venv)
firsts_trajs[0] = 1 firsts_trajs[0] = 1
reward_trajs = np.zeros((self.n_steps, self.n_envs)) reward_trajs = np.zeros((self.n_steps, self.n_envs))
if self.save_full_observations: # state-only
obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
obs_full_trajs = np.vstack(
(obs_full_trajs, prev_obs_venv["state"][:, -1][None])
)
# Collect a set of trajectories from env # Collect a set of trajectories from env
for step in range(self.n_steps): for step in range(self.n_steps):
@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent):
) )
reward_trajs[step] = reward_venv reward_trajs[step] = reward_venv
firsts_trajs[step + 1] = terminated_venv | truncated_venv firsts_trajs[step + 1] = terminated_venv | truncated_venv
if self.save_full_observations: # state-only
obs_full_venv = np.array(
[info["full_obs"]["state"] for info in info_venv]
) # n_envs x act_steps x obs_dim
obs_full_trajs = np.vstack(
(obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
)
# update for next step # update for next step
prev_obs_venv = obs_venv prev_obs_venv = obs_venv
@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent):
success_rate = 0 success_rate = 0
log.info("[WARNING] No episode completed within the iteration!") log.info("[WARNING] No episode completed within the iteration!")
# Plot state trajectories (only in D3IL)
if self.traj_plotter is not None:
self.traj_plotter(
obs_full_trajs=obs_full_trajs,
n_render=self.n_render,
max_episode_steps=self.max_episode_steps,
render_dir=self.render_dir,
itr=0,
)
# Log loss and save metrics # Log loss and save metrics
time = timer() time = timer()
log.info( log.info(

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz
seed: 42
device: cuda:0
env_name: avoiding-m5
obs_dim: 4
action_dim: 2
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 25
render_num: 40
plotter:
_target_: env.plot_traj.TrajPlotter
env_type: avoid
normalization_path: ${normalization_path}
env:
n_envs: 40
name: ${env_name}
max_episode_steps: 100
reset_at_iteration: True
save_video: False
best_reward_threshold_for_success: 2
save_full_observations: True
wrappers:
d3il_lowdim:
normalization_path: ${normalization_path}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
pass_full_observations: ${env.save_full_observations}
reset_within_step: False
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
activation_type: ReLU
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 15000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 15000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 32 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 15000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 15000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 32 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 15000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 15000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 16 batch_size: 16
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 5000
batch_size: 32 batch_size: 32
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 5000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -1,5 +1,7 @@
## Fine-tuning experiments ## Fine-tuning experiments
**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.
### Comparing diffusion-based RL algorithms (Sec. 5.1) ### Comparing diffusion-based RL algorithms (Sec. 5.1)
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env. Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.

View File

@ -0,0 +1,66 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: lamp
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
cond_mlp_dims: [512, 64]
use_layernorm: True # needed for larger MLP
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: lamp
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps} name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42 seed: 42

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 58
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 700
best_reward_threshold_for_success: 1
specific:
headless: true
furniture: one_leg
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,66 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: round_table
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
cond_mlp_dims: [512, 64]
use_layernorm: True # needed for larger MLP
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
seed: 42
device: cuda:0
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
obs_dim: 44
action_dim: 10
denoising_steps: 100
cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
render_num: 0
env:
n_envs: 1000
name: ${env_name}
env_type: furniture
max_episode_steps: 1000
best_reward_threshold_for_success: 2
specific:
headless: true
furniture: round_table
randomness: low
normalization_path: ${normalization_path}
obs_steps: ${cond_steps}
act_steps: ${act_steps}
sparse_reward: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2, 4]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
groupnorm_eps: 1e-4 # not important
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -25,12 +25,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 10000 n_epochs: 3000
batch_size: 256 batch_size: 256
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -31,7 +31,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 8000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -30,7 +30,7 @@ train:
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-5 min_lr: 1e-5
save_model_freq: 500 save_model_freq: 500

View File

@ -17,10 +17,10 @@ obs_dim: 17
action_dim: 6 action_dim: 6
denoising_steps: 20 denoising_steps: 20
cond_steps: 1 cond_steps: 1
horizon_steps: 1 horizon_steps: 4
act_steps: 1 act_steps: 4
n_steps: 1000 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0 render_num: 0
env: env:

View File

@ -20,7 +20,7 @@ cond_steps: 1
horizon_steps: 4 horizon_steps: 4
act_steps: 4 act_steps: 4
n_steps: 500 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0 render_num: 0
env: env:

View File

@ -0,0 +1,61 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: walker2d-medium-v2
obs_dim: 17
action_dim: 6
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
render_num: 0
env:
n_envs: 40
name: ${env_name}
max_episode_steps: 1000
reset_at_iteration: False
save_video: False
best_reward_threshold_for_success: 3 # success rate not relevant for gym tasks
wrappers:
mujoco_locomotion_lowdim:
normalization_path: ${normalization_path}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
activation_type: ReLU
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 3000 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 3000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100

View File

@ -23,15 +23,14 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 500 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100
model: model:

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 3000 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 3000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 500 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 8000 n_epochs: 3000
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 8000 first_cycle_steps: 3000
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 5000 n_epochs: 3000
batch_size: 256 batch_size: 256
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 0 weight_decay: 0
lr_scheduler: lr_scheduler:
first_cycle_steps: 5000 first_cycle_steps: 3000
warmup_steps: 100 warmup_steps: 100
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 8000 n_epochs: 3000
batch_size: 256 batch_size: 256
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 8000 first_cycle_steps: 3000
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 5000 n_epochs: 3000
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 5000 first_cycle_steps: 3000
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 8000 n_epochs: 3000
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-5 weight_decay: 1e-5
lr_scheduler: lr_scheduler:
first_cycle_steps: 8000 first_cycle_steps: 3000
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 5000 n_epochs: 3000
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 5000 first_cycle_steps: 3000
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 500 save_model_freq: 500

View File

@ -24,12 +24,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 3000 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-3 learning_rate: 1e-3
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 3000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100

View File

@ -23,12 +23,12 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_epochs: 3000 n_epochs: 200
batch_size: 128 batch_size: 128
learning_rate: 1e-4 learning_rate: 1e-4
weight_decay: 1e-6 weight_decay: 1e-6
lr_scheduler: lr_scheduler:
first_cycle_steps: 3000 first_cycle_steps: 200
warmup_steps: 1 warmup_steps: 1
min_lr: 1e-4 min_lr: 1e-4
save_model_freq: 100 save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 501
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -55,7 +55,7 @@ train:
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 1000
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -67,7 +67,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 10000 batch_size: 5000
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1
@ -75,7 +75,7 @@ train:
model: model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion _target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune # HP to tune
gamma_denoising: 0.99 gamma_denoising: 1
clip_ploss_coef: 0.1 clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1 clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3 clip_ploss_coef_rate: 3
@ -94,10 +94,10 @@ model:
residual_style: True residual_style: True
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps} ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim} obs_dim: ${obs_dim}

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 501
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 1000 batch_size: 500
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1

View File

@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 301
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -67,7 +67,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 10000 batch_size: 5000
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1
@ -75,7 +75,7 @@ train:
model: model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion _target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune # HP to tune
gamma_denoising: 0.99 gamma_denoising: 1
clip_ploss_coef: 0.1 clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1 clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3 clip_ploss_coef_rate: 3

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 301
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 1000 batch_size: 500
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1

View File

@ -42,7 +42,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 501
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -55,7 +55,7 @@ train:
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 10000 first_cycle_steps: 1000
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -67,7 +67,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 10000 batch_size: 5000
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1
@ -75,7 +75,7 @@ train:
model: model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion _target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune # HP to tune
gamma_denoising: 0.99 gamma_denoising: 1
clip_ploss_coef: 0.1 clip_ploss_coef: 0.1
clip_ploss_coef_base: 0.1 clip_ploss_coef_base: 0.1
clip_ploss_coef_rate: 3 clip_ploss_coef_rate: 3
@ -94,10 +94,10 @@ model:
residual_style: True residual_style: True
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps} ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim} obs_dim: ${obs_dim}

View File

@ -40,7 +40,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 301
n_critic_warmup_itr: 0 n_critic_warmup_itr: 0
n_steps: 1000 n_steps: 1000
gamma: 0.99 gamma: 0.99
@ -65,7 +65,7 @@ train:
reward_scale_running: True reward_scale_running: True
reward_scale_const: 1.0 reward_scale_const: 1.0
gae_lambda: 0.95 gae_lambda: 0.95
batch_size: 1000 batch_size: 500
update_epochs: 10 update_epochs: 10
vf_coef: 0.5 vf_coef: 0.5
target_kl: 1 target_kl: 1

View File

@ -1,6 +1,6 @@
## Pre-training experiments ## Pre-training experiments
**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints. **Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6.
### Comparing diffusion-based RL algorithms (Sec. 5.1) ### Comparing diffusion-based RL algorithms (Sec. 5.1)
Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`. Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps} name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
@ -28,7 +28,7 @@ n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0 render_num: 0
env: env:
n_envs: 50 n_envs: 20 # reduce gpu usage
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 23
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 40
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps} name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz

View File

@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent
name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps} name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz

View File

@ -0,0 +1,65 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 19
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 16
mlp_dims: [512, 512, 512]
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,97 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
spatial_emb: 128
time_dim: 32
mlp_dims: [512, 512, 512]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 19
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 40
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,100 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -18,8 +18,8 @@ obs_dim: 23
action_dim: 7 action_dim: 7
denoising_steps: 20 denoising_steps: 20
cond_steps: 1 cond_steps: 1
horizon_steps: 1 horizon_steps: 4
act_steps: 1 act_steps: 4
n_steps: 400 # each episode takes max_episode_steps / act_steps steps n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0 render_num: 0

View File

@ -0,0 +1,97 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 20 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
spatial_emb: 128
time_dim: 32
mlp_dims: [768, 768, 768]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,68 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 23
action_dim: 7
denoising_steps: 20
cond_steps: 1
horizon_steps: 4
act_steps: 4
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -3,9 +3,9 @@ defaults:
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps} name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.
seed: 42 seed: 42
device: cuda:0 device: cuda:0
env_name: square env_name: transport
obs_dim: 23 obs_dim: 59
action_dim: 7 action_dim: 14
denoising_steps: 20
cond_steps: 1 cond_steps: 1
horizon_steps: 1 horizon_steps: 8
act_steps: 1 act_steps: 8
n_steps: 400 # each episode takes max_episode_steps / act_steps steps n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0 render_num: 0
@ -27,7 +28,7 @@ env:
n_envs: 50 n_envs: 50
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 400 max_episode_steps: 800
save_video: False save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
@ -35,6 +36,9 @@ env:
low_dim_keys: ['robot0_eef_pos', low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat', 'robot0_eef_quat',
'robot0_gripper_qpos', 'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos",
'object'] # same order of preprocessed observations 'object'] # same order of preprocessed observations
multi_step: multi_step:
n_obs_steps: ${cond_steps} n_obs_steps: ${cond_steps}
@ -42,19 +46,24 @@ env:
max_episode_steps: ${env.max_episode_steps} max_episode_steps: ${env.max_episode_steps}
reset_within_step: True reset_within_step: True
model: model:
_target_: model.common.gaussian.GaussianModel _target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3 randn_clip_value: 3
# #
network_path: ${base_policy_path} network_path: ${base_policy_path}
network: network:
_target_: model.common.mlp_gaussian.Gaussian_MLP _target_: model.diffusion.mlp_diffusion.DiffusionMLP
time_dim: 32
mlp_dims: [1024, 1024, 1024] mlp_dims: [1024, 1024, 1024]
activation_type: ReLU residual_style: True
use_layernorm: true
fixed_std: 0.1
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'} cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device} device: ${device}

View File

@ -0,0 +1,102 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 18
action_dim: 14
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 8
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: 200 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos"]
image_keys: ['shouldercamera0_image',
'shouldercamera1_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [6, 96, 96]
state:
shape: [18]
action:
shape: [14]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
augment: False
num_img: 2
spatial_emb: 128
time_dim: 32
mlp_dims: [768, 768, 768]
residual_style: True
img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,71 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 59
action_dim: 14
denoising_steps: 20
cond_steps: 1
horizon_steps: 16
act_steps: 8
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
wrappers:
robomimic_lowdim:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos",
'object'] # same order of preprocessed observations
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.Unet1D
diffusion_step_embed_dim: 16
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
action_dim: ${action_dim}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -0,0 +1,107 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path:
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: transport
obs_dim: 18
action_dim: 14
denoising_steps: 100
cond_steps: 1
img_cond_steps: 1
horizon_steps: 16
act_steps: 8
use_ddim: True
ddim_steps: 5
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
render_num: 0
env:
n_envs: 30 # reduce gpu usage
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 800
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos',
"robot1_eef_pos",
"robot1_eef_quat",
"robot1_gripper_qpos"]
image_keys: ['shouldercamera0_image',
'shouldercamera1_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [6, 96, 96]
state:
shape: [18]
action:
shape: [14]
model:
_target_: model.diffusion.diffusion.DiffusionModel
predict_epsilon: True
denoised_clip_value: 1.0
randn_clip_value: 3
#
use_ddim: ${use_ddim}
ddim_steps: ${ddim_steps}
network_path: ${base_policy_path}
network:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
num_img: 2
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults:
- 1
- 2
kernel_size: 5
n_groups: 8
smaller_encoder: false
cond_predict_scale: true
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
@ -54,13 +55,13 @@ train:
actor_lr: 1e-4 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-4 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -66,16 +66,16 @@ train:
gamma: 0.999 gamma: 0.999
augment: True augment: True
grad_accumulate: 15 grad_accumulate: 15
actor_lr: 1e-4 actor_lr: 5e-5
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-4 min_lr: 5e-5
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -27,7 +27,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -47,20 +47,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 151
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: can
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 151
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 151
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
@ -57,22 +57,22 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 200 n_train_itr: 151
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
augment: True augment: True
grad_accumulate: 5 grad_accumulate: 5
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 200 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 200 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2 embed_style: embed2
embed_norm: 0 embed_norm: 0
img_cond_steps: ${img_cond_steps} img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 151
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 151
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -1,13 +1,14 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps} name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed} logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
@ -54,13 +55,13 @@ train:
actor_lr: 1e-4 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-4 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -60,22 +60,22 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 151 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
augment: True augment: True
grad_accumulate: 15 grad_accumulate: 15
actor_lr: 1e-4 actor_lr: 5e-5
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-4 min_lr: 5e-5
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -27,7 +27,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -47,20 +47,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -102,10 +102,10 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps} ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim} obs_dim: ${obs_dim}

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: lift
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 300
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['robot0_eye_in_hand_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 81
n_critic_warmup_itr: 2
n_steps: 300
gamma: 0.999
augment: True
grad_accumulate: 15
actor_lr: 5e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 5e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 500
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 40
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -93,9 +93,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -1,7 +1,7 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent _target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
@ -57,22 +57,22 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 200 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
augment: True augment: True
grad_accumulate: 5 grad_accumulate: 5
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 200 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 200 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2 embed_style: embed2
embed_norm: 0 embed_norm: 0
img_cond_steps: ${img_cond_steps} img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -45,20 +45,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -26,7 +26,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -26,7 +26,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 300 max_episode_steps: 300
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -46,20 +46,20 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 300 n_train_itr: 81
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 300 n_steps: 300
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -95,9 +95,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -1,7 +1,7 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -27,7 +27,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 400 max_episode_steps: 400
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -54,14 +54,14 @@ train:
actor_lr: 1e-4 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-4 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
val_freq: 10 val_freq: 10

View File

@ -69,13 +69,13 @@ train:
actor_lr: 1e-5 actor_lr: 1e-5
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-5
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100

View File

@ -1,7 +1,7 @@
defaults: defaults:
- _self_ - _self_
hydra: hydra:
run: run:
dir: ${logdir} dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
@ -27,7 +27,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 400 max_episode_steps: 400
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -47,21 +47,21 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 201
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 400 n_steps: 400
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 2e-5
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
val_freq: 10 val_freq: 10
@ -102,10 +102,10 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps} ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim} obs_dim: ${obs_dim}

View File

@ -0,0 +1,173 @@
defaults:
- _self_
hydra:
run:
dir: ${logdir}
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
seed: 42
device: cuda:0
env_name: square
obs_dim: 9
action_dim: 7
denoising_steps: 100
ft_denoising_steps: 5
cond_steps: 1
img_cond_steps: 1
horizon_steps: 4
act_steps: 4
use_ddim: True
env:
n_envs: 50
name: ${env_name}
best_reward_threshold_for_success: 1
max_episode_steps: 400
save_video: False
use_image_obs: True
wrappers:
robomimic_image:
normalization_path: ${normalization_path}
low_dim_keys: ['robot0_eef_pos',
'robot0_eef_quat',
'robot0_gripper_qpos']
image_keys: ['agentview_image']
shape_meta: ${shape_meta}
multi_step:
n_obs_steps: ${cond_steps}
n_action_steps: ${act_steps}
max_episode_steps: ${env.max_episode_steps}
reset_within_step: True
shape_meta:
obs:
rgb:
shape: [3, 96, 96]
state:
shape: [9]
action:
shape: [7]
wandb:
entity: ${oc.env:DPPO_WANDB_ENTITY}
project: robomimic-${env_name}-finetune
run: ${now:%H-%M-%S}_${name}
train:
n_train_itr: 301
n_critic_warmup_itr: 2
n_steps: 400
gamma: 0.999
augment: True
grad_accumulate: 20
actor_lr: 1e-5
actor_weight_decay: 0
actor_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-5
critic_lr: 1e-3
critic_weight_decay: 0
critic_lr_scheduler:
first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10
min_lr: 1e-3
save_model_freq: 100
val_freq: 10
render:
freq: 1
num: 0
# PPO specific
reward_scale_running: True
reward_scale_const: 1.0
gae_lambda: 0.95
batch_size: 500
logprob_batch_size: 1000
update_epochs: 10
vf_coef: 0.5
target_kl: 1
model:
_target_: model.diffusion.diffusion_ppo.PPODiffusion
# HP to tune
gamma_denoising: 0.99
clip_ploss_coef: 0.01
clip_ploss_coef_base: 0.001
clip_ploss_coef_rate: 3
randn_clip_value: 3
min_sampling_denoising_std: 0.1
min_logprob_denoising_std: 0.1
#
use_ddim: ${use_ddim}
ddim_steps: ${ft_denoising_steps}
learn_eta: False
eta:
base_eta: 1
input_dim: ${obs_dim}
mlp_dims: [256, 256]
action_dim: ${action_dim}
min_eta: 0.1
max_eta: 1.0
_target_: model.diffusion.eta.EtaFixed
network_path: ${base_policy_path}
actor:
_target_: model.diffusion.unet.VisionUnet1D
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
augment: False
spatial_emb: 128
diffusion_step_embed_dim: 32
dim: 64
dim_mults: [1, 2]
kernel_size: 5
n_groups: 8
smaller_encoder: False
cond_predict_scale: True
action_dim: ${action_dim}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
critic:
_target_: model.common.critic.ViTCritic
spatial_emb: 128
augment: False
backbone:
_target_: model.common.vit.VitEncoder
obs_shape: ${shape_meta.obs.rgb.shape}
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
img_h: ${shape_meta.obs.rgb.shape[1]}
img_w: ${shape_meta.obs.rgb.shape[2]}
cfg:
patch_size: 8
depth: 1
embed_dim: 128
num_heads: 4
embed_style: embed2
embed_norm: 0
img_cond_steps: ${img_cond_steps}
mlp_dims: [256, 256, 256]
activation_type: Mish
residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
ft_denoising_steps: ${ft_denoising_steps}
horizon_steps: ${horizon_steps}
obs_dim: ${obs_dim}
action_dim: ${action_dim}
denoising_steps: ${denoising_steps}
device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 400 max_episode_steps: 400
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -45,21 +45,21 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 201
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 400 n_steps: 400
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
val_freq: 10 val_freq: 10
@ -93,9 +93,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -57,7 +57,7 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 500 n_train_itr: 301
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 400 n_steps: 400
gamma: 0.999 gamma: 0.999
@ -66,13 +66,13 @@ train:
actor_lr: 1e-5 actor_lr: 1e-5
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 500 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-5 min_lr: 1e-5
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 500 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 10
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
@ -140,9 +140,9 @@ model:
embed_style: embed2 embed_style: embed2
embed_norm: 0 embed_norm: 0
img_cond_steps: ${img_cond_steps} img_cond_steps: ${img_cond_steps}
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

View File

@ -25,7 +25,7 @@ env:
name: ${env_name} name: ${env_name}
best_reward_threshold_for_success: 1 best_reward_threshold_for_success: 1
max_episode_steps: 400 max_episode_steps: 400
save_video: false save_video: False
wrappers: wrappers:
robomimic_lowdim: robomimic_lowdim:
normalization_path: ${normalization_path} normalization_path: ${normalization_path}
@ -45,21 +45,21 @@ wandb:
run: ${now:%H-%M-%S}_${name} run: ${now:%H-%M-%S}_${name}
train: train:
n_train_itr: 1000 n_train_itr: 201
n_critic_warmup_itr: 2 n_critic_warmup_itr: 2
n_steps: 400 n_steps: 400
gamma: 0.999 gamma: 0.999
actor_lr: 1e-5 actor_lr: 1e-4
actor_weight_decay: 0 actor_weight_decay: 0
actor_lr_scheduler: actor_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-5 min_lr: 1e-4
critic_lr: 1e-3 critic_lr: 1e-3
critic_weight_decay: 0 critic_weight_decay: 0
critic_lr_scheduler: critic_lr_scheduler:
first_cycle_steps: 1000 first_cycle_steps: ${train.n_train_itr}
warmup_steps: 10 warmup_steps: 0
min_lr: 1e-3 min_lr: 1e-3
save_model_freq: 100 save_model_freq: 100
val_freq: 10 val_freq: 10
@ -94,9 +94,9 @@ model:
action_dim: ${action_dim} action_dim: ${action_dim}
critic: critic:
_target_: model.common.critic.CriticObs _target_: model.common.critic.CriticObs
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
mlp_dims: [256, 256, 256] mlp_dims: [256, 256, 256]
activation_type: Mish activation_type: Mish
residual_style: True residual_style: True
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
horizon_steps: ${horizon_steps} horizon_steps: ${horizon_steps}
device: ${device} device: ${device}

Some files were not shown because too many files have changed in this diff Show More