v0.7 (#26)
* update from scratch configs * update gym pretraining configs - use fewer epochs * update robomimic pretraining configs - use fewer epochs * allow trajectory plotting in eval agent * add simple vit unet * update avoid pretraining configs - use fewer epochs * update furniture pretraining configs - use same amount of epochs as before * add robomimic diffusion unet pretraining configs * update robomimic finetuning configs - higher lr * add vit unet checkpoint urls * update pretraining and finetuning instructions as configs are updated
This commit is contained in:
parent
d2929f65e1
commit
1d04211666
@ -57,6 +57,7 @@ class EvalAgent:
|
|||||||
self.horizon_steps = cfg.horizon_steps
|
self.horizon_steps = cfg.horizon_steps
|
||||||
self.max_episode_steps = cfg.env.max_episode_steps
|
self.max_episode_steps = cfg.env.max_episode_steps
|
||||||
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
|
self.reset_at_iteration = cfg.env.get("reset_at_iteration", True)
|
||||||
|
self.save_full_observations = cfg.env.get("save_full_observations", False)
|
||||||
self.furniture_sparse_reward = (
|
self.furniture_sparse_reward = (
|
||||||
cfg.env.specific.get("sparse_reward", False)
|
cfg.env.specific.get("sparse_reward", False)
|
||||||
if "specific" in cfg.env
|
if "specific" in cfg.env
|
||||||
@ -85,6 +86,10 @@ class EvalAgent:
|
|||||||
assert not (
|
assert not (
|
||||||
self.n_render <= 0 and self.render_video
|
self.n_render <= 0 and self.render_video
|
||||||
), "Need to set n_render > 0 if saving video"
|
), "Need to set n_render > 0 if saving video"
|
||||||
|
self.traj_plotter = (
|
||||||
|
hydra.utils.instantiate(cfg.plotter)
|
||||||
|
if "plotter" in cfg else None
|
||||||
|
)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
pass
|
pass
|
||||||
|
@ -37,6 +37,11 @@ class EvalDiffusionAgent(EvalAgent):
|
|||||||
prev_obs_venv = self.reset_env_all(options_venv=options_venv)
|
prev_obs_venv = self.reset_env_all(options_venv=options_venv)
|
||||||
firsts_trajs[0] = 1
|
firsts_trajs[0] = 1
|
||||||
reward_trajs = np.zeros((self.n_steps, self.n_envs))
|
reward_trajs = np.zeros((self.n_steps, self.n_envs))
|
||||||
|
if self.save_full_observations: # state-only
|
||||||
|
obs_full_trajs = np.empty((0, self.n_envs, self.obs_dim))
|
||||||
|
obs_full_trajs = np.vstack(
|
||||||
|
(obs_full_trajs, prev_obs_venv["state"][:, -1][None])
|
||||||
|
)
|
||||||
|
|
||||||
# Collect a set of trajectories from env
|
# Collect a set of trajectories from env
|
||||||
for step in range(self.n_steps):
|
for step in range(self.n_steps):
|
||||||
@ -62,6 +67,13 @@ class EvalDiffusionAgent(EvalAgent):
|
|||||||
)
|
)
|
||||||
reward_trajs[step] = reward_venv
|
reward_trajs[step] = reward_venv
|
||||||
firsts_trajs[step + 1] = terminated_venv | truncated_venv
|
firsts_trajs[step + 1] = terminated_venv | truncated_venv
|
||||||
|
if self.save_full_observations: # state-only
|
||||||
|
obs_full_venv = np.array(
|
||||||
|
[info["full_obs"]["state"] for info in info_venv]
|
||||||
|
) # n_envs x act_steps x obs_dim
|
||||||
|
obs_full_trajs = np.vstack(
|
||||||
|
(obs_full_trajs, obs_full_venv.transpose(1, 0, 2))
|
||||||
|
)
|
||||||
|
|
||||||
# update for next step
|
# update for next step
|
||||||
prev_obs_venv = obs_venv
|
prev_obs_venv = obs_venv
|
||||||
@ -108,6 +120,16 @@ class EvalDiffusionAgent(EvalAgent):
|
|||||||
success_rate = 0
|
success_rate = 0
|
||||||
log.info("[WARNING] No episode completed within the iteration!")
|
log.info("[WARNING] No episode completed within the iteration!")
|
||||||
|
|
||||||
|
# Plot state trajectories (only in D3IL)
|
||||||
|
if self.traj_plotter is not None:
|
||||||
|
self.traj_plotter(
|
||||||
|
obs_full_trajs=obs_full_trajs,
|
||||||
|
n_render=self.n_render,
|
||||||
|
max_episode_steps=self.max_episode_steps,
|
||||||
|
render_dir=self.render_dir,
|
||||||
|
itr=0,
|
||||||
|
)
|
||||||
|
|
||||||
# Log loss and save metrics
|
# Log loss and save metrics
|
||||||
time = timer()
|
time = timer()
|
||||||
log.info(
|
log.info(
|
||||||
|
68
cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
Normal file
68
cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/d3il-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/d3il/avoid_m1/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: avoiding-m5
|
||||||
|
obs_dim: 4
|
||||||
|
action_dim: 2
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 25
|
||||||
|
render_num: 40
|
||||||
|
|
||||||
|
plotter:
|
||||||
|
_target_: env.plot_traj.TrajPlotter
|
||||||
|
env_type: avoid
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 40
|
||||||
|
name: ${env_name}
|
||||||
|
max_episode_steps: 100
|
||||||
|
reset_at_iteration: True
|
||||||
|
save_video: False
|
||||||
|
best_reward_threshold_for_success: 2
|
||||||
|
save_full_observations: True
|
||||||
|
wrappers:
|
||||||
|
d3il_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
pass_full_observations: ${env.save_full_observations}
|
||||||
|
reset_within_step: False
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 16
|
||||||
|
mlp_dims: [512, 512, 512]
|
||||||
|
activation_type: ReLU
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 15000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 15000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 32
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 15000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 15000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 32
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 15000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 15000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 16
|
batch_size: 16
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 5000
|
||||||
batch_size: 32
|
batch_size: 32
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 5000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
## Fine-tuning experiments
|
## Fine-tuning experiments
|
||||||
|
|
||||||
|
**Update, Nov 20 2024**: In v0.7 we updated the fine-tuning configs as we find sample efficiency can be improved with higher actor learning rate and other hyperparameters. If you would like to replicate the original experimental results from the paper, please use the configs from v0.6. Otherwise we recommmend starting with configs from v0.7 for your applications.
|
||||||
|
|
||||||
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
||||||
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.
|
Gym configs are under `cfg/gym/finetune/<env_name>/`, and the naming follows `ft_<alg_name>_diffusion_mlp`, e.g., `ft_awr_diffusion_mlp`. `alg_name` is one of `rwr`, `awr`, `dipo`, `idql`, `dql`, `qsm`, `ppo` (DPPO), `ppo_exact` (exact likelihood). They share the same pre-trained checkpoint in each env.
|
||||||
|
|
||||||
|
66
cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
Normal file
66
cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||||
|
obs_dim: 44
|
||||||
|
action_dim: 10
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 8
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 1000
|
||||||
|
name: ${env_name}
|
||||||
|
env_type: furniture
|
||||||
|
max_episode_steps: 1000
|
||||||
|
best_reward_threshold_for_success: 2
|
||||||
|
specific:
|
||||||
|
headless: true
|
||||||
|
furniture: lamp
|
||||||
|
randomness: low
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
obs_steps: ${cond_steps}
|
||||||
|
act_steps: ${act_steps}
|
||||||
|
sparse_reward: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 32
|
||||||
|
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
|
||||||
|
cond_mlp_dims: [512, 64]
|
||||||
|
use_layernorm: True # needed for larger MLP
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
68
cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||||
|
obs_dim: 44
|
||||||
|
action_dim: 10
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 16
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 1000
|
||||||
|
name: ${env_name}
|
||||||
|
env_type: furniture
|
||||||
|
max_episode_steps: 1000
|
||||||
|
best_reward_threshold_for_success: 2
|
||||||
|
specific:
|
||||||
|
headless: true
|
||||||
|
furniture: lamp
|
||||||
|
randomness: low
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
obs_steps: ${cond_steps}
|
||||||
|
act_steps: ${act_steps}
|
||||||
|
sparse_reward: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2, 4]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
groupnorm_eps: 1e-4 # not important
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
|||||||
|
|
||||||
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/furniture-pretrain/one_leg/one_leg_low_dim_pre_diffusion_mlp_ta8_td100/2024-07-22_20-01-16/checkpoint/state_8000.pt
|
base_policy_path:
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
seed: 42
|
seed: 42
|
||||||
|
68
cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||||
|
obs_dim: 58
|
||||||
|
action_dim: 10
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 16
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 1000
|
||||||
|
name: ${env_name}
|
||||||
|
env_type: furniture
|
||||||
|
max_episode_steps: 700
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
specific:
|
||||||
|
headless: true
|
||||||
|
furniture: one_leg
|
||||||
|
randomness: low
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
obs_steps: ${cond_steps}
|
||||||
|
act_steps: ${act_steps}
|
||||||
|
sparse_reward: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2, 4]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
groupnorm_eps: 1e-4 # not important
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
66
cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
Normal file
66
cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||||
|
obs_dim: 44
|
||||||
|
action_dim: 10
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 8
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 1000
|
||||||
|
name: ${env_name}
|
||||||
|
env_type: furniture
|
||||||
|
max_episode_steps: 1000
|
||||||
|
best_reward_threshold_for_success: 2
|
||||||
|
specific:
|
||||||
|
headless: true
|
||||||
|
furniture: round_table
|
||||||
|
randomness: low
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
obs_steps: ${cond_steps}
|
||||||
|
act_steps: ${act_steps}
|
||||||
|
sparse_reward: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 32
|
||||||
|
mlp_dims: [1024, 1024, 1024, 1024, 1024, 1024, 1024]
|
||||||
|
cond_mlp_dims: [512, 64]
|
||||||
|
use_layernorm: True # needed for larger MLP
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
68
cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
Normal file
68
cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/furniture-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/furniture/${env.specific.furniture}_${env.specific.randomness}/normalization.pth
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: ${env.specific.furniture}_${env.specific.randomness}_dim
|
||||||
|
obs_dim: 44
|
||||||
|
action_dim: 10
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 16
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'}
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 1000
|
||||||
|
name: ${env_name}
|
||||||
|
env_type: furniture
|
||||||
|
max_episode_steps: 1000
|
||||||
|
best_reward_threshold_for_success: 2
|
||||||
|
specific:
|
||||||
|
headless: true
|
||||||
|
furniture: round_table
|
||||||
|
randomness: low
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
obs_steps: ${cond_steps}
|
||||||
|
act_steps: ${act_steps}
|
||||||
|
sparse_reward: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2, 4]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
groupnorm_eps: 1e-4 # not important
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -30,7 +30,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -30,7 +30,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -30,7 +30,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -25,12 +25,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 10000
|
n_epochs: 3000
|
||||||
batch_size: 256
|
batch_size: 256
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -30,7 +30,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -31,7 +31,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 8000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -30,7 +30,7 @@ train:
|
|||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -17,10 +17,10 @@ obs_dim: 17
|
|||||||
action_dim: 6
|
action_dim: 6
|
||||||
denoising_steps: 20
|
denoising_steps: 20
|
||||||
cond_steps: 1
|
cond_steps: 1
|
||||||
horizon_steps: 1
|
horizon_steps: 4
|
||||||
act_steps: 1
|
act_steps: 4
|
||||||
|
|
||||||
n_steps: 1000 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||||
render_num: 0
|
render_num: 0
|
||||||
|
|
||||||
env:
|
env:
|
||||||
|
@ -20,7 +20,7 @@ cond_steps: 1
|
|||||||
horizon_steps: 4
|
horizon_steps: 4
|
||||||
act_steps: 4
|
act_steps: 4
|
||||||
|
|
||||||
n_steps: 500 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||||
render_num: 0
|
render_num: 0
|
||||||
|
|
||||||
env:
|
env:
|
||||||
|
61
cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
Normal file
61
cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/gym-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/gym/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: walker2d-medium-v2
|
||||||
|
obs_dim: 17
|
||||||
|
action_dim: 6
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation.
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 40
|
||||||
|
name: ${env_name}
|
||||||
|
max_episode_steps: 1000
|
||||||
|
reset_at_iteration: False
|
||||||
|
save_video: False
|
||||||
|
best_reward_threshold_for_success: 3 # success rate not relevant for gym tasks
|
||||||
|
wrappers:
|
||||||
|
mujoco_locomotion_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 16
|
||||||
|
mlp_dims: [512, 512, 512]
|
||||||
|
activation_type: ReLU
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 3000
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 3000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -23,15 +23,14 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 500
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
|
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
|
||||||
model:
|
model:
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 3000
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 3000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -23,12 +23,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 500
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 8000
|
n_epochs: 3000
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 8000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -23,12 +23,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 5000
|
n_epochs: 3000
|
||||||
batch_size: 256
|
batch_size: 256
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 0
|
weight_decay: 0
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 5000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 100
|
warmup_steps: 100
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 8000
|
n_epochs: 3000
|
||||||
batch_size: 256
|
batch_size: 256
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 8000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -23,12 +23,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 5000
|
n_epochs: 3000
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 5000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 8000
|
n_epochs: 3000
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-5
|
weight_decay: 1e-5
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 8000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -23,12 +23,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 5000
|
n_epochs: 3000
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 5000
|
first_cycle_steps: 3000
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 500
|
save_model_freq: 500
|
||||||
|
@ -24,12 +24,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 3000
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-3
|
learning_rate: 1e-3
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 3000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -23,12 +23,12 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_epochs: 3000
|
n_epochs: 200
|
||||||
batch_size: 128
|
batch_size: 128
|
||||||
learning_rate: 1e-4
|
learning_rate: 1e-4
|
||||||
weight_decay: 1e-6
|
weight_decay: 1e-6
|
||||||
lr_scheduler:
|
lr_scheduler:
|
||||||
first_cycle_steps: 3000
|
first_cycle_steps: 200
|
||||||
warmup_steps: 1
|
warmup_steps: 1
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||||
|
|
||||||
@ -42,7 +42,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 501
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -55,7 +55,7 @@ train:
|
|||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 1000
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -67,7 +67,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 10000
|
batch_size: 5000
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
@ -75,7 +75,7 @@ train:
|
|||||||
model:
|
model:
|
||||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
# HP to tune
|
# HP to tune
|
||||||
gamma_denoising: 0.99
|
gamma_denoising: 1
|
||||||
clip_ploss_coef: 0.1
|
clip_ploss_coef: 0.1
|
||||||
clip_ploss_coef_base: 0.1
|
clip_ploss_coef_base: 0.1
|
||||||
clip_ploss_coef_rate: 3
|
clip_ploss_coef_rate: 3
|
||||||
@ -94,10 +94,10 @@ model:
|
|||||||
residual_style: True
|
residual_style: True
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
ft_denoising_steps: ${ft_denoising_steps}
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
obs_dim: ${obs_dim}
|
obs_dim: ${obs_dim}
|
||||||
|
@ -40,7 +40,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 501
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -65,7 +65,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 1000
|
batch_size: 500
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
|
@ -42,7 +42,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 301
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -67,7 +67,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 10000
|
batch_size: 5000
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
@ -75,7 +75,7 @@ train:
|
|||||||
model:
|
model:
|
||||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
# HP to tune
|
# HP to tune
|
||||||
gamma_denoising: 0.99
|
gamma_denoising: 1
|
||||||
clip_ploss_coef: 0.1
|
clip_ploss_coef: 0.1
|
||||||
clip_ploss_coef_base: 0.1
|
clip_ploss_coef_base: 0.1
|
||||||
clip_ploss_coef_rate: 3
|
clip_ploss_coef_rate: 3
|
||||||
|
@ -40,7 +40,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 301
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -65,7 +65,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 1000
|
batch_size: 500
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
|
@ -42,7 +42,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 501
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -55,7 +55,7 @@ train:
|
|||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 10000
|
first_cycle_steps: 1000
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -67,7 +67,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 10000
|
batch_size: 5000
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
@ -75,7 +75,7 @@ train:
|
|||||||
model:
|
model:
|
||||||
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
# HP to tune
|
# HP to tune
|
||||||
gamma_denoising: 0.99
|
gamma_denoising: 1
|
||||||
clip_ploss_coef: 0.1
|
clip_ploss_coef: 0.1
|
||||||
clip_ploss_coef_base: 0.1
|
clip_ploss_coef_base: 0.1
|
||||||
clip_ploss_coef_rate: 3
|
clip_ploss_coef_rate: 3
|
||||||
@ -94,10 +94,10 @@ model:
|
|||||||
residual_style: True
|
residual_style: True
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
ft_denoising_steps: ${ft_denoising_steps}
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
obs_dim: ${obs_dim}
|
obs_dim: ${obs_dim}
|
||||||
|
@ -40,7 +40,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 301
|
||||||
n_critic_warmup_itr: 0
|
n_critic_warmup_itr: 0
|
||||||
n_steps: 1000
|
n_steps: 1000
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
@ -65,7 +65,7 @@ train:
|
|||||||
reward_scale_running: True
|
reward_scale_running: True
|
||||||
reward_scale_const: 1.0
|
reward_scale_const: 1.0
|
||||||
gae_lambda: 0.95
|
gae_lambda: 0.95
|
||||||
batch_size: 1000
|
batch_size: 500
|
||||||
update_epochs: 10
|
update_epochs: 10
|
||||||
vf_coef: 0.5
|
vf_coef: 0.5
|
||||||
target_kl: 1
|
target_kl: 1
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
## Pre-training experiments
|
## Pre-training experiments
|
||||||
|
|
||||||
**Update, Nov 6 2024**: we fixed the issue of EMA update being too infrequent causing slow pre-training. Now the number of epochs needed for pre-training can be much slower than those used in the configs. We recommend training with fewer epochs and testing the early checkpoints.
|
**Update, Nov 20 2024**: We fixed the issue of EMA update being too infrequent causing slow pre-training ([commit](https://github.com/irom-princeton/dppo/commit/e1ef4ca1cfbff85e5ae6c49f5e57debd70174616)). Now the number of epochs needed for pre-training can be much lower than those used in the configs (e.g., 3000 for robomimic state and 1000 for robomimic pixel), and we have updated the pre-training configs in v0.7. If you would like to replicate the original experimental results from the paper, please use v0.6.
|
||||||
|
|
||||||
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
### Comparing diffusion-based RL algorithms (Sec. 5.1)
|
||||||
Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.
|
Gym configs are under `cfg/gym/pretrain/<env_name>/`, and the config name is `pre_diffusion_mlp`. Robomimic configs are under `cfg/robomimic/pretrain/<env_name>/`, and the name is also `pre_diffusion_mlp`.
|
||||||
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
|||||||
|
|
||||||
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_img_ta4_td100/2024-07-30_22-23-55/checkpoint/state_5000.pt
|
base_policy_path:
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
|||||||
render_num: 0
|
render_num: 0
|
||||||
|
|
||||||
env:
|
env:
|
||||||
n_envs: 50
|
n_envs: 20 # reduce gpu usage
|
||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
|
68
cfg/robomimic/eval/can/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/can/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: can
|
||||||
|
obs_dim: 23
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 40
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
wrappers:
|
||||||
|
robomimic_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
'object'] # same order of preprocessed observations
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 40
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
102
cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
Normal file
102
cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: can
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 20 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['robot0_eye_in_hand_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 40
|
||||||
|
dim_mults:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: false
|
||||||
|
cond_predict_scale: true
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
|
|||||||
|
|
||||||
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
|
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_ta4/2024-06-28_13-31-00/checkpoint/state_5000.pt
|
base_policy_path:
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ _target_: agent.eval.eval_gaussian_img_agent.EvalImgGaussianAgent
|
|||||||
|
|
||||||
name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
|
name: ${env_name}_eval_gaussian_mlp_img_ta${horizon_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_gaussian_mlp_img_ta4/2024-07-28_21-54-40/checkpoint/state_1000.pt
|
base_policy_path:
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
65
cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
Normal file
65
cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: lift
|
||||||
|
obs_dim: 19
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
wrappers:
|
||||||
|
robomimic_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
'object'] # same order of preprocessed observations
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 16
|
||||||
|
mlp_dims: [512, 512, 512]
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
97
cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
Normal file
97
cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: lift
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 20 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['robot0_eye_in_hand_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
time_dim: 32
|
||||||
|
mlp_dims: [512, 512, 512]
|
||||||
|
residual_style: True
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
68
cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/lift/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: lift
|
||||||
|
obs_dim: 19
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 75 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 40
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
wrappers:
|
||||||
|
robomimic_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
'object'] # same order of preprocessed observations
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 40
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
100
cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
Normal file
100
cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: lift
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 300 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 20 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['robot0_eye_in_hand_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 40
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -18,8 +18,8 @@ obs_dim: 23
|
|||||||
action_dim: 7
|
action_dim: 7
|
||||||
denoising_steps: 20
|
denoising_steps: 20
|
||||||
cond_steps: 1
|
cond_steps: 1
|
||||||
horizon_steps: 1
|
horizon_steps: 4
|
||||||
act_steps: 1
|
act_steps: 4
|
||||||
|
|
||||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||||
render_num: 0
|
render_num: 0
|
||||||
|
97
cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
Normal file
97
cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: square
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 20 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 400
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['agentview_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
time_dim: 32
|
||||||
|
mlp_dims: [768, 768, 768]
|
||||||
|
residual_style: True
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
68
cfg/robomimic/eval/square/eval_diffusion_unet.yaml
Normal file
68
cfg/robomimic/eval/square/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: square
|
||||||
|
obs_dim: 23
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
|
||||||
|
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 400
|
||||||
|
save_video: False
|
||||||
|
wrappers:
|
||||||
|
robomimic_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
'object'] # same order of preprocessed observations
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
102
cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
Normal file
102
cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: square
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 30 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 400
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['agentview_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 64
|
||||||
|
dim_mults:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: false
|
||||||
|
cond_predict_scale: true
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -3,9 +3,9 @@ defaults:
|
|||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.eval.eval_gaussian_agent.EvalGaussianAgent
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
name: ${env_name}_eval_gaussian_mlp_ta${horizon_steps}
|
name: ${env_name}_eval_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path:
|
base_policy_path:
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
@ -13,12 +13,13 @@ normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.
|
|||||||
|
|
||||||
seed: 42
|
seed: 42
|
||||||
device: cuda:0
|
device: cuda:0
|
||||||
env_name: square
|
env_name: transport
|
||||||
obs_dim: 23
|
obs_dim: 59
|
||||||
action_dim: 7
|
action_dim: 14
|
||||||
|
denoising_steps: 20
|
||||||
cond_steps: 1
|
cond_steps: 1
|
||||||
horizon_steps: 1
|
horizon_steps: 8
|
||||||
act_steps: 1
|
act_steps: 8
|
||||||
|
|
||||||
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||||
render_num: 0
|
render_num: 0
|
||||||
@ -27,7 +28,7 @@ env:
|
|||||||
n_envs: 50
|
n_envs: 50
|
||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 400
|
max_episode_steps: 800
|
||||||
save_video: False
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
@ -35,6 +36,9 @@ env:
|
|||||||
low_dim_keys: ['robot0_eef_pos',
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
'robot0_eef_quat',
|
'robot0_eef_quat',
|
||||||
'robot0_gripper_qpos',
|
'robot0_gripper_qpos',
|
||||||
|
"robot1_eef_pos",
|
||||||
|
"robot1_eef_quat",
|
||||||
|
"robot1_gripper_qpos",
|
||||||
'object'] # same order of preprocessed observations
|
'object'] # same order of preprocessed observations
|
||||||
multi_step:
|
multi_step:
|
||||||
n_obs_steps: ${cond_steps}
|
n_obs_steps: ${cond_steps}
|
||||||
@ -42,19 +46,24 @@ env:
|
|||||||
max_episode_steps: ${env.max_episode_steps}
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
reset_within_step: True
|
reset_within_step: True
|
||||||
|
|
||||||
|
|
||||||
model:
|
model:
|
||||||
_target_: model.common.gaussian.GaussianModel
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
randn_clip_value: 3
|
randn_clip_value: 3
|
||||||
#
|
#
|
||||||
network_path: ${base_policy_path}
|
network_path: ${base_policy_path}
|
||||||
network:
|
network:
|
||||||
_target_: model.common.mlp_gaussian.Gaussian_MLP
|
_target_: model.diffusion.mlp_diffusion.DiffusionMLP
|
||||||
|
time_dim: 32
|
||||||
mlp_dims: [1024, 1024, 1024]
|
mlp_dims: [1024, 1024, 1024]
|
||||||
activation_type: ReLU
|
residual_style: True
|
||||||
use_layernorm: true
|
|
||||||
fixed_std: 0.1
|
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
device: ${device}
|
device: ${device}
|
102
cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
Normal file
102
cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_mlp_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: transport
|
||||||
|
obs_dim: 18
|
||||||
|
action_dim: 14
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 8
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 200 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 30 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 800
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
"robot1_eef_pos",
|
||||||
|
"robot1_eef_quat",
|
||||||
|
"robot1_gripper_qpos"]
|
||||||
|
image_keys: ['shouldercamera0_image',
|
||||||
|
'shouldercamera1_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [6, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [18]
|
||||||
|
action:
|
||||||
|
shape: [14]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.mlp_diffusion.VisionDiffusionMLP
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
augment: False
|
||||||
|
num_img: 2
|
||||||
|
spatial_emb: 128
|
||||||
|
time_dim: 32
|
||||||
|
mlp_dims: [768, 768, 768]
|
||||||
|
residual_style: True
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
71
cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
Normal file
71
cfg/robomimic/eval/transport/eval_diffusion_unet.yaml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_agent.EvalDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: transport
|
||||||
|
obs_dim: 59
|
||||||
|
action_dim: 14
|
||||||
|
denoising_steps: 20
|
||||||
|
cond_steps: 1
|
||||||
|
horizon_steps: 16
|
||||||
|
act_steps: 8
|
||||||
|
|
||||||
|
n_steps: 100 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 800
|
||||||
|
save_video: False
|
||||||
|
wrappers:
|
||||||
|
robomimic_lowdim:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
"robot1_eef_pos",
|
||||||
|
"robot1_eef_quat",
|
||||||
|
"robot1_gripper_qpos",
|
||||||
|
'object'] # same order of preprocessed observations
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.Unet1D
|
||||||
|
diffusion_step_embed_dim: 16
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
107
cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
Normal file
107
cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.eval.eval_diffusion_img_agent.EvalImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_eval_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path:
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: transport
|
||||||
|
obs_dim: 18
|
||||||
|
action_dim: 14
|
||||||
|
denoising_steps: 100
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 16
|
||||||
|
act_steps: 8
|
||||||
|
use_ddim: True
|
||||||
|
ddim_steps: 5
|
||||||
|
|
||||||
|
n_steps: 400 # each episode takes max_episode_steps / act_steps steps
|
||||||
|
render_num: 0
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 30 # reduce gpu usage
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 800
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos',
|
||||||
|
"robot1_eef_pos",
|
||||||
|
"robot1_eef_quat",
|
||||||
|
"robot1_gripper_qpos"]
|
||||||
|
image_keys: ['shouldercamera0_image',
|
||||||
|
'shouldercamera1_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [6, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [18]
|
||||||
|
action:
|
||||||
|
shape: [14]
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion.DiffusionModel
|
||||||
|
predict_epsilon: True
|
||||||
|
denoised_clip_value: 1.0
|
||||||
|
randn_clip_value: 3
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ddim_steps}
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
network:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
num_img: 2
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 64
|
||||||
|
dim_mults:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: false
|
||||||
|
cond_predict_scale: true
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -7,7 +7,8 @@ _target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
|||||||
|
|
||||||
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
|
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
|
||||||
|
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_mlp_ta4_td20/2024-06-28_13-29-54/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
@ -54,13 +55,13 @@ train:
|
|||||||
actor_lr: 1e-4
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -66,16 +66,16 @@ train:
|
|||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
augment: True
|
augment: True
|
||||||
grad_accumulate: 15
|
grad_accumulate: 15
|
||||||
actor_lr: 1e-4
|
actor_lr: 5e-5
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-4
|
min_lr: 5e-5
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -27,7 +27,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -47,20 +47,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 151
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
173
cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/can/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/can/can_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-34-05_42/checkpoint/state_500.pt
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: can
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
ft_denoising_steps: 5
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['robot0_eye_in_hand_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
wandb:
|
||||||
|
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||||
|
project: robomimic-${env_name}-finetune
|
||||||
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
|
train:
|
||||||
|
n_train_itr: 151
|
||||||
|
n_critic_warmup_itr: 2
|
||||||
|
n_steps: 300
|
||||||
|
gamma: 0.999
|
||||||
|
augment: True
|
||||||
|
grad_accumulate: 15
|
||||||
|
actor_lr: 5e-5
|
||||||
|
actor_weight_decay: 0
|
||||||
|
actor_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 5e-5
|
||||||
|
critic_lr: 1e-3
|
||||||
|
critic_weight_decay: 0
|
||||||
|
critic_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 1e-3
|
||||||
|
save_model_freq: 100
|
||||||
|
val_freq: 10
|
||||||
|
render:
|
||||||
|
freq: 1
|
||||||
|
num: 0
|
||||||
|
# PPO specific
|
||||||
|
reward_scale_running: True
|
||||||
|
reward_scale_const: 1.0
|
||||||
|
gae_lambda: 0.95
|
||||||
|
batch_size: 500
|
||||||
|
logprob_batch_size: 500
|
||||||
|
update_epochs: 10
|
||||||
|
vf_coef: 0.5
|
||||||
|
target_kl: 1
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
|
# HP to tune
|
||||||
|
gamma_denoising: 0.99
|
||||||
|
clip_ploss_coef: 0.01
|
||||||
|
clip_ploss_coef_base: 0.001
|
||||||
|
clip_ploss_coef_rate: 3
|
||||||
|
randn_clip_value: 3
|
||||||
|
min_sampling_denoising_std: 0.1
|
||||||
|
min_logprob_denoising_std: 0.1
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ft_denoising_steps}
|
||||||
|
learn_eta: False
|
||||||
|
eta:
|
||||||
|
base_eta: 1
|
||||||
|
input_dim: ${obs_dim}
|
||||||
|
mlp_dims: [256, 256]
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
min_eta: 0.1
|
||||||
|
max_eta: 1.0
|
||||||
|
_target_: model.diffusion.eta.EtaFixed
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
actor:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 40
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
critic:
|
||||||
|
_target_: model.common.critic.ViTCritic
|
||||||
|
spatial_emb: 128
|
||||||
|
augment: False
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
mlp_dims: [256, 256, 256]
|
||||||
|
activation_type: Mish
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -45,20 +45,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 151
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
|
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
|
||||||
|
|
||||||
@ -57,22 +57,22 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 200
|
n_train_itr: 151
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
augment: True
|
augment: True
|
||||||
grad_accumulate: 5
|
grad_accumulate: 5
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 200
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 200
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -140,9 +140,9 @@ model:
|
|||||||
embed_style: embed2
|
embed_style: embed2
|
||||||
embed_norm: 0
|
embed_norm: 0
|
||||||
img_cond_steps: ${img_cond_steps}
|
img_cond_steps: ${img_cond_steps}
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -45,20 +45,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 151
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -46,20 +46,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 151
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||||
|
|
||||||
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
name: ${env_name}_ft_diffusion_mlp_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||||
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 8000 for comparing policy parameterizations
|
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_5000.pt # use 5000 for comparing diffusion rl algorithms
|
||||||
|
# base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_mlp_ta4_td20/2024-06-28_14-47-58/checkpoint/state_8000.pt # use 8000 for comparing policy parameterizations
|
||||||
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}.json
|
||||||
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}/normalization.npz
|
||||||
|
|
||||||
@ -54,13 +55,13 @@ train:
|
|||||||
actor_lr: 1e-4
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -60,22 +60,22 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 151
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
augment: True
|
augment: True
|
||||||
grad_accumulate: 15
|
grad_accumulate: 15
|
||||||
actor_lr: 1e-4
|
actor_lr: 5e-5
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-4
|
min_lr: 5e-5
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -27,7 +27,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -47,20 +47,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -102,10 +102,10 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
ft_denoising_steps: ${ft_denoising_steps}
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
obs_dim: ${obs_dim}
|
obs_dim: ${obs_dim}
|
||||||
|
173
cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/lift/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/lift/lift_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-35-19_42/checkpoint/state_500.pt
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: lift
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
ft_denoising_steps: 5
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 300
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['robot0_eye_in_hand_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
wandb:
|
||||||
|
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||||
|
project: robomimic-${env_name}-finetune
|
||||||
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
|
train:
|
||||||
|
n_train_itr: 81
|
||||||
|
n_critic_warmup_itr: 2
|
||||||
|
n_steps: 300
|
||||||
|
gamma: 0.999
|
||||||
|
augment: True
|
||||||
|
grad_accumulate: 15
|
||||||
|
actor_lr: 5e-5
|
||||||
|
actor_weight_decay: 0
|
||||||
|
actor_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 5e-5
|
||||||
|
critic_lr: 1e-3
|
||||||
|
critic_weight_decay: 0
|
||||||
|
critic_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 1e-3
|
||||||
|
save_model_freq: 100
|
||||||
|
val_freq: 10
|
||||||
|
render:
|
||||||
|
freq: 1
|
||||||
|
num: 0
|
||||||
|
# PPO specific
|
||||||
|
reward_scale_running: True
|
||||||
|
reward_scale_const: 1.0
|
||||||
|
gae_lambda: 0.95
|
||||||
|
batch_size: 500
|
||||||
|
logprob_batch_size: 500
|
||||||
|
update_epochs: 10
|
||||||
|
vf_coef: 0.5
|
||||||
|
target_kl: 1
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
|
# HP to tune
|
||||||
|
gamma_denoising: 0.99
|
||||||
|
clip_ploss_coef: 0.01
|
||||||
|
clip_ploss_coef_base: 0.001
|
||||||
|
clip_ploss_coef_rate: 3
|
||||||
|
randn_clip_value: 3
|
||||||
|
min_sampling_denoising_std: 0.1
|
||||||
|
min_logprob_denoising_std: 0.1
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ft_denoising_steps}
|
||||||
|
learn_eta: False
|
||||||
|
eta:
|
||||||
|
base_eta: 1
|
||||||
|
input_dim: ${obs_dim}
|
||||||
|
mlp_dims: [256, 256]
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
min_eta: 0.1
|
||||||
|
max_eta: 1.0
|
||||||
|
_target_: model.diffusion.eta.EtaFixed
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
actor:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 40
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
critic:
|
||||||
|
_target_: model.common.critic.ViTCritic
|
||||||
|
spatial_emb: 128
|
||||||
|
augment: False
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
mlp_dims: [256, 256, 256]
|
||||||
|
activation_type: Mish
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -45,20 +45,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -93,9 +93,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -1,7 +1,7 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
|
_target_: agent.finetune.train_ppo_gaussian_img_agent.TrainPPOImgGaussianAgent
|
||||||
|
|
||||||
@ -57,22 +57,22 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 200
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
augment: True
|
augment: True
|
||||||
grad_accumulate: 5
|
grad_accumulate: 5
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 200
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 200
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -140,9 +140,9 @@ model:
|
|||||||
embed_style: embed2
|
embed_style: embed2
|
||||||
embed_norm: 0
|
embed_norm: 0
|
||||||
img_cond_steps: ${img_cond_steps}
|
img_cond_steps: ${img_cond_steps}
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -45,20 +45,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -94,9 +94,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -26,7 +26,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -46,20 +46,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -94,9 +94,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -26,7 +26,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 300
|
max_episode_steps: 300
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -46,20 +46,20 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 300
|
n_train_itr: 81
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 300
|
n_steps: 300
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -95,9 +95,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -1,7 +1,7 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 400
|
max_episode_steps: 400
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -54,14 +54,14 @@ train:
|
|||||||
actor_lr: 1e-4
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-4
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
val_freq: 10
|
val_freq: 10
|
||||||
|
@ -69,13 +69,13 @@ train:
|
|||||||
actor_lr: 1e-5
|
actor_lr: 1e-5
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
defaults:
|
defaults:
|
||||||
- _self_
|
- _self_
|
||||||
hydra:
|
hydra:
|
||||||
run:
|
run:
|
||||||
dir: ${logdir}
|
dir: ${logdir}
|
||||||
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
_target_: agent.finetune.train_ppo_diffusion_agent.TrainPPODiffusionAgent
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 400
|
max_episode_steps: 400
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -47,21 +47,21 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 201
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 400
|
n_steps: 400
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 2e-5
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
val_freq: 10
|
val_freq: 10
|
||||||
@ -102,10 +102,10 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
ft_denoising_steps: ${ft_denoising_steps}
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
obs_dim: ${obs_dim}
|
obs_dim: ${obs_dim}
|
||||||
|
173
cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
Normal file
173
cfg/robomimic/finetune/square/ft_ppo_diffusion_unet_img.yaml
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
defaults:
|
||||||
|
- _self_
|
||||||
|
hydra:
|
||||||
|
run:
|
||||||
|
dir: ${logdir}
|
||||||
|
_target_: agent.finetune.train_ppo_diffusion_img_agent.TrainPPOImgDiffusionAgent
|
||||||
|
|
||||||
|
name: ${env_name}_ft_diffusion_unet_img_ta${horizon_steps}_td${denoising_steps}_tdf${ft_denoising_steps}
|
||||||
|
logdir: ${oc.env:DPPO_LOG_DIR}/robomimic-finetune/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}
|
||||||
|
base_policy_path: ${oc.env:DPPO_LOG_DIR}/robomimic-pretrain/square/square_pre_diffusion_unet_img_ta4_td100/2024-11-15_17-36-37_42/checkpoint/state_500.pt
|
||||||
|
robomimic_env_cfg_path: cfg/robomimic/env_meta/${env_name}-img.json
|
||||||
|
normalization_path: ${oc.env:DPPO_DATA_DIR}/robomimic/${env_name}-img/normalization.npz
|
||||||
|
|
||||||
|
seed: 42
|
||||||
|
device: cuda:0
|
||||||
|
env_name: square
|
||||||
|
obs_dim: 9
|
||||||
|
action_dim: 7
|
||||||
|
denoising_steps: 100
|
||||||
|
ft_denoising_steps: 5
|
||||||
|
cond_steps: 1
|
||||||
|
img_cond_steps: 1
|
||||||
|
horizon_steps: 4
|
||||||
|
act_steps: 4
|
||||||
|
use_ddim: True
|
||||||
|
|
||||||
|
env:
|
||||||
|
n_envs: 50
|
||||||
|
name: ${env_name}
|
||||||
|
best_reward_threshold_for_success: 1
|
||||||
|
max_episode_steps: 400
|
||||||
|
save_video: False
|
||||||
|
use_image_obs: True
|
||||||
|
wrappers:
|
||||||
|
robomimic_image:
|
||||||
|
normalization_path: ${normalization_path}
|
||||||
|
low_dim_keys: ['robot0_eef_pos',
|
||||||
|
'robot0_eef_quat',
|
||||||
|
'robot0_gripper_qpos']
|
||||||
|
image_keys: ['agentview_image']
|
||||||
|
shape_meta: ${shape_meta}
|
||||||
|
multi_step:
|
||||||
|
n_obs_steps: ${cond_steps}
|
||||||
|
n_action_steps: ${act_steps}
|
||||||
|
max_episode_steps: ${env.max_episode_steps}
|
||||||
|
reset_within_step: True
|
||||||
|
|
||||||
|
shape_meta:
|
||||||
|
obs:
|
||||||
|
rgb:
|
||||||
|
shape: [3, 96, 96]
|
||||||
|
state:
|
||||||
|
shape: [9]
|
||||||
|
action:
|
||||||
|
shape: [7]
|
||||||
|
|
||||||
|
wandb:
|
||||||
|
entity: ${oc.env:DPPO_WANDB_ENTITY}
|
||||||
|
project: robomimic-${env_name}-finetune
|
||||||
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
|
train:
|
||||||
|
n_train_itr: 301
|
||||||
|
n_critic_warmup_itr: 2
|
||||||
|
n_steps: 400
|
||||||
|
gamma: 0.999
|
||||||
|
augment: True
|
||||||
|
grad_accumulate: 20
|
||||||
|
actor_lr: 1e-5
|
||||||
|
actor_weight_decay: 0
|
||||||
|
actor_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 1e-5
|
||||||
|
critic_lr: 1e-3
|
||||||
|
critic_weight_decay: 0
|
||||||
|
critic_lr_scheduler:
|
||||||
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
|
warmup_steps: 10
|
||||||
|
min_lr: 1e-3
|
||||||
|
save_model_freq: 100
|
||||||
|
val_freq: 10
|
||||||
|
render:
|
||||||
|
freq: 1
|
||||||
|
num: 0
|
||||||
|
# PPO specific
|
||||||
|
reward_scale_running: True
|
||||||
|
reward_scale_const: 1.0
|
||||||
|
gae_lambda: 0.95
|
||||||
|
batch_size: 500
|
||||||
|
logprob_batch_size: 1000
|
||||||
|
update_epochs: 10
|
||||||
|
vf_coef: 0.5
|
||||||
|
target_kl: 1
|
||||||
|
|
||||||
|
model:
|
||||||
|
_target_: model.diffusion.diffusion_ppo.PPODiffusion
|
||||||
|
# HP to tune
|
||||||
|
gamma_denoising: 0.99
|
||||||
|
clip_ploss_coef: 0.01
|
||||||
|
clip_ploss_coef_base: 0.001
|
||||||
|
clip_ploss_coef_rate: 3
|
||||||
|
randn_clip_value: 3
|
||||||
|
min_sampling_denoising_std: 0.1
|
||||||
|
min_logprob_denoising_std: 0.1
|
||||||
|
#
|
||||||
|
use_ddim: ${use_ddim}
|
||||||
|
ddim_steps: ${ft_denoising_steps}
|
||||||
|
learn_eta: False
|
||||||
|
eta:
|
||||||
|
base_eta: 1
|
||||||
|
input_dim: ${obs_dim}
|
||||||
|
mlp_dims: [256, 256]
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
min_eta: 0.1
|
||||||
|
max_eta: 1.0
|
||||||
|
_target_: model.diffusion.eta.EtaFixed
|
||||||
|
network_path: ${base_policy_path}
|
||||||
|
actor:
|
||||||
|
_target_: model.diffusion.unet.VisionUnet1D
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
augment: False
|
||||||
|
spatial_emb: 128
|
||||||
|
diffusion_step_embed_dim: 32
|
||||||
|
dim: 64
|
||||||
|
dim_mults: [1, 2]
|
||||||
|
kernel_size: 5
|
||||||
|
n_groups: 8
|
||||||
|
smaller_encoder: False
|
||||||
|
cond_predict_scale: True
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
critic:
|
||||||
|
_target_: model.common.critic.ViTCritic
|
||||||
|
spatial_emb: 128
|
||||||
|
augment: False
|
||||||
|
backbone:
|
||||||
|
_target_: model.common.vit.VitEncoder
|
||||||
|
obs_shape: ${shape_meta.obs.rgb.shape}
|
||||||
|
num_channel: ${eval:'3 * ${img_cond_steps}'} # each image patch is history concatenated
|
||||||
|
img_h: ${shape_meta.obs.rgb.shape[1]}
|
||||||
|
img_w: ${shape_meta.obs.rgb.shape[2]}
|
||||||
|
cfg:
|
||||||
|
patch_size: 8
|
||||||
|
depth: 1
|
||||||
|
embed_dim: 128
|
||||||
|
num_heads: 4
|
||||||
|
embed_style: embed2
|
||||||
|
embed_norm: 0
|
||||||
|
img_cond_steps: ${img_cond_steps}
|
||||||
|
mlp_dims: [256, 256, 256]
|
||||||
|
activation_type: Mish
|
||||||
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
|
ft_denoising_steps: ${ft_denoising_steps}
|
||||||
|
horizon_steps: ${horizon_steps}
|
||||||
|
obs_dim: ${obs_dim}
|
||||||
|
action_dim: ${action_dim}
|
||||||
|
denoising_steps: ${denoising_steps}
|
||||||
|
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 400
|
max_episode_steps: 400
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -45,21 +45,21 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 201
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 400
|
n_steps: 400
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
val_freq: 10
|
val_freq: 10
|
||||||
@ -93,9 +93,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -57,7 +57,7 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 500
|
n_train_itr: 301
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 400
|
n_steps: 400
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
@ -66,13 +66,13 @@ train:
|
|||||||
actor_lr: 1e-5
|
actor_lr: 1e-5
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 500
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-5
|
min_lr: 1e-5
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 500
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
@ -140,9 +140,9 @@ model:
|
|||||||
embed_style: embed2
|
embed_style: embed2
|
||||||
embed_norm: 0
|
embed_norm: 0
|
||||||
img_cond_steps: ${img_cond_steps}
|
img_cond_steps: ${img_cond_steps}
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
@ -25,7 +25,7 @@ env:
|
|||||||
name: ${env_name}
|
name: ${env_name}
|
||||||
best_reward_threshold_for_success: 1
|
best_reward_threshold_for_success: 1
|
||||||
max_episode_steps: 400
|
max_episode_steps: 400
|
||||||
save_video: false
|
save_video: False
|
||||||
wrappers:
|
wrappers:
|
||||||
robomimic_lowdim:
|
robomimic_lowdim:
|
||||||
normalization_path: ${normalization_path}
|
normalization_path: ${normalization_path}
|
||||||
@ -45,21 +45,21 @@ wandb:
|
|||||||
run: ${now:%H-%M-%S}_${name}
|
run: ${now:%H-%M-%S}_${name}
|
||||||
|
|
||||||
train:
|
train:
|
||||||
n_train_itr: 1000
|
n_train_itr: 201
|
||||||
n_critic_warmup_itr: 2
|
n_critic_warmup_itr: 2
|
||||||
n_steps: 400
|
n_steps: 400
|
||||||
gamma: 0.999
|
gamma: 0.999
|
||||||
actor_lr: 1e-5
|
actor_lr: 1e-4
|
||||||
actor_weight_decay: 0
|
actor_weight_decay: 0
|
||||||
actor_lr_scheduler:
|
actor_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-5
|
min_lr: 1e-4
|
||||||
critic_lr: 1e-3
|
critic_lr: 1e-3
|
||||||
critic_weight_decay: 0
|
critic_weight_decay: 0
|
||||||
critic_lr_scheduler:
|
critic_lr_scheduler:
|
||||||
first_cycle_steps: 1000
|
first_cycle_steps: ${train.n_train_itr}
|
||||||
warmup_steps: 10
|
warmup_steps: 0
|
||||||
min_lr: 1e-3
|
min_lr: 1e-3
|
||||||
save_model_freq: 100
|
save_model_freq: 100
|
||||||
val_freq: 10
|
val_freq: 10
|
||||||
@ -94,9 +94,9 @@ model:
|
|||||||
action_dim: ${action_dim}
|
action_dim: ${action_dim}
|
||||||
critic:
|
critic:
|
||||||
_target_: model.common.critic.CriticObs
|
_target_: model.common.critic.CriticObs
|
||||||
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
|
||||||
mlp_dims: [256, 256, 256]
|
mlp_dims: [256, 256, 256]
|
||||||
activation_type: Mish
|
activation_type: Mish
|
||||||
residual_style: True
|
residual_style: True
|
||||||
|
cond_dim: ${eval:'${obs_dim} * ${cond_steps}'}
|
||||||
horizon_steps: ${horizon_steps}
|
horizon_steps: ${horizon_steps}
|
||||||
device: ${device}
|
device: ${device}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user