From bf0de5f54d311a146523f1b295c52fe68d2c5005 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 27 Nov 2022 01:25:31 +0100 Subject: [PATCH 1/3] set boundary condition to None at reset() & end replanning if env is done --- fancy_gym/black_box/black_box_wrapper.py | 10 +++++----- fancy_gym/envs/__init__.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 2a2a3f5..d8dcbaa 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -161,9 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): - current_pos = self.current_pos - current_vel = self.current_vel - step_action = self.tracking_controller.get_action(pos, vel, current_pos, current_vel) + step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, done, info = self.env.step(c_action) rewards[t] = c_reward @@ -180,10 +178,10 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(current_pos, current_vel, obs, c_action, + if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps): - if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: + if not done and self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: continue self.condition_pos = pos if self.condition_on_desired else None @@ -214,4 +212,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.current_traj_steps = 0 self.plan_steps = 0 self.traj_gen.reset() + self.condition_vel = None + self.condition_pos = None return super(BlackBoxWrapper, self).reset() diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 890e043..f1a59ec 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -485,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' + _env_id = f'{_name[0]}ProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v @@ -498,7 +498,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True register( From bb94c9c70705f3904660f67b8a6b19d7f696d01f Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 27 Nov 2022 01:26:18 +0100 Subject: [PATCH 2/3] typos --- fancy_gym/envs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index f1a59ec..d504990 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -485,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v From e7d2454e304ccc69539e9252469619b9d5e90056 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 28 Nov 2022 10:49:05 +0100 Subject: [PATCH 3/3] make if condition better --- fancy_gym/black_box/black_box_wrapper.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index d8dcbaa..66c5f3e 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -22,7 +22,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times: int = None, + max_planning_times: int = np.inf, condition_on_desired: bool = False ): """ @@ -178,11 +178,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): - - if not done and self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: - continue + if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps) + and self.plan_steps < self.max_planning_times): self.condition_pos = pos if self.condition_on_desired else None self.condition_vel = vel if self.condition_on_desired else None