From ffe48dfb57899177c4722864f20d0cb4e7144097 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 9 Nov 2022 12:49:17 +0100 Subject: [PATCH 01/11] change back infos to default setting --- fancy_gym/black_box/black_box_wrapper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index dc5445e..ea28ef7 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -210,10 +210,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.current_traj_steps += t + 1 if self.verbose >= 2: - infos['desired_pos'] = position[:t+1] - infos['desired_vel'] = velocity[:t+1] - infos['current_pos'] = self.current_pos - infos['current_vel'] = self.current_vel + infos['positions'] = position + infos['velocities'] = velocity infos['step_actions'] = actions[:t + 1] infos['step_observations'] = observations[:t + 1] infos['step_rewards'] = rewards[:t + 1] From 60e18d29646d237671ab81b697ae5036845390d3 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 9 Nov 2022 17:54:34 +0100 Subject: [PATCH 02/11] add prodmp to test_black_box --- fancy_gym/envs/__init__.py | 2 +- fancy_gym/examples/example_sim_env.py | 9 +++ .../examples/examples_movement_primitives.py | 10 +-- fancy_gym/utils/make_env_helpers.py | 3 + test/test_black_box.py | 37 +++++---- test/test_replanning_envs.py | 75 +++++++++++++++++-- 6 files changed, 108 insertions(+), 28 deletions(-) create mode 100644 fancy_gym/examples/example_sim_env.py diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index d3dfa8e..4483637 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -503,7 +503,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 diff --git a/fancy_gym/examples/example_sim_env.py b/fancy_gym/examples/example_sim_env.py new file mode 100644 index 0000000..f949a89 --- /dev/null +++ b/fancy_gym/examples/example_sim_env.py @@ -0,0 +1,9 @@ +import gym_blockpush +import gym + +env = gym.make("blockpush-v0") +env.start() +env.scene.reset() +for i in range(100): + env.step(env.action_space.sample()) + env.render() \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index e19eacb..707dccd 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,17 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 3c73ba9..0ba7a4a 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -175,6 +175,9 @@ def make_bb( if phase_kwargs.get('learn_delay'): phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] + if traj_gen_kwargs['trajectory_generator_type'] == 'prodmp': + assert basis_kwargs['basis_generator_type'] == 'prodmp', 'prodmp trajectory generator requires prodmp basis generator' + phase_gen = get_phase_generator(**phase_kwargs) basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs) controller = get_controller(**controller_kwargs) diff --git a/test/test_black_box.py b/test/test_black_box.py index d5e3a88..69c0088 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -67,28 +67,32 @@ def test_missing_wrapper(env_id: str): fancy_gym.make_bb(env_id, [], {}, {}, {}, {}, {}) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) def test_missing_local_state(mp_type: str): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() with pytest.raises(NotImplementedError): env.step(env.action_space.sample()) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('verbose', [1, 2]) def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], verbose: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {'verbose': verbose}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() info_keys = list(env.step(env.action_space.sample())[3].keys()) @@ -104,15 +108,17 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] assert all(e in info_keys for e in mp_keys) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) for _ in range(5): env.reset() @@ -121,14 +127,15 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): assert length == env.spec.max_episode_steps -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('reward_aggregation', [np.sum, np.mean, np.median, lambda x: np.mean(x[::2])]) def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], float]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'reward_aggregation': reward_aggregation}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() # ToyEnv only returns 1 as reward assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) @@ -149,12 +156,13 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp assert env.observation_space.shape == wrapper.context_mask[wrapper.context_mask].shape -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) -@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) +@pytest.mark.parametrize('num_basis', [0, 2, 5]) # should add 1 back after the bug is fixed @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type, 'action_dim': num_dof @@ -164,28 +172,29 @@ def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: boo 'learn_tau': learn_tau, 'learn_delay': learn_delay }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, 'num_basis': num_basis }) base_dims = num_dof * num_basis - additional_dims = num_dof if mp_type == 'dmp' else 0 + additional_dims = num_dof if 'dmp' in mp_type else 0 traj_modification_dims = int(learn_tau) + int(learn_delay) assert env.action_space.shape[0] == base_dims + traj_modification_dims + additional_dims -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('a', [1]) @pytest.mark.parametrize('b', [1.0]) @pytest.mark.parametrize('c', [[1], [1.0], ['str'], [{'a': 'b'}], [np.ones(3, )]]) @pytest.mark.parametrize('d', [{'a': 1}, {1: 2.0}, {'a': [1.0]}, {'a': np.ones(3, )}, {'a': {'a': 'b'}}]) @pytest.mark.parametrize('e', [Object()]) def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: Object): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, + {'basis_generator_type': basis_generator_type}, a=a, b=b, c=c, d=d, e=e ) assert a is env.a diff --git a/test/test_replanning_envs.py b/test/test_replanning_envs.py index 300faed..4228284 100644 --- a/test/test_replanning_envs.py +++ b/test/test_replanning_envs.py @@ -1,6 +1,14 @@ from itertools import chain +from typing import Tuple, Type, Union, Optional, Callable +import gym +import numpy as np import pytest +from gym import register +from gym.core import ActType, ObsType + +import fancy_gym +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper import fancy_gym from test.utils import run_env, run_env_determinism @@ -10,14 +18,65 @@ Fancy_ProDMP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] All_ProDMP_IDS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] +class Object(object): + pass -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_envs(env_id: str): - """Tests that ProDMP environments run without errors using random actions.""" - run_env(env_id) -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_determinism(env_id: str): - """Tests that ProDMP environments are deterministic.""" - run_env_determinism(env_id, 0) +class ToyEnv(gym.Env): + observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + dt = 0.02 + def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): + self.a, self.b, self.c, self.d, self.e = a, b, c, d, e + + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, + options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: + return np.array([-1]) + + def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: + return np.array([-1]), 1, False, {} + + def render(self, mode="human"): + pass + + +class ToyWrapper(RawInterfaceWrapper): + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return np.ones(self.action_space.shape) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return np.zeros(self.action_space.shape) + +@pytest.fixture(scope="session", autouse=True) +def setup(): + register( + id=f'toy-v0', + entry_point='test.test_black_box:ToyEnv', + max_episode_steps=50, + ) +# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) +# def test_replanning_envs(env_id: str): +# """Tests that ProDMP environments run without errors using random actions.""" +# run_env(env_id) +# +# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) +# def test_replanning_determinism(env_id: str): +# """Tests that ProDMP environments are deterministic.""" +# run_env_determinism(env_id, 0) + +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +def test_missing_local_state(mp_type: str): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, + {'trajectory_generator_type': mp_type}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'exp'}, + {'basis_generator_type': basis_generator_type}) + env.reset() + with pytest.raises(NotImplementedError): + env.step(env.action_space.sample()) \ No newline at end of file From 104b90929608f1299f8c559452c457a43fcb498d Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 13 Nov 2022 16:59:13 +0100 Subject: [PATCH 03/11] delete hacky experimental codes & add tests to test_black_box --- fancy_gym/black_box/black_box_wrapper.py | 20 +---- fancy_gym/envs/__init__.py | 2 + test/test_black_box.py | 97 +++++++++++++++++++----- 3 files changed, 86 insertions(+), 33 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ea28ef7..88f8a32 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -24,7 +24,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, max_planning_times: int = 1, - desired_conditioning: bool = False + desired_traj_bc: bool = False ): """ gym.Wrapper for leveraging a black box approach with a trajectory generator. @@ -59,18 +59,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): # reward computation self.reward_aggregation = reward_aggregation - # self.traj_gen.basis_gn.show_basis(plot=True) # spaces self.return_context_observation = not (learn_sub_trajectories or self.do_replanning) - # self.return_context_observation = True self.traj_gen_action_space = self._get_traj_gen_action_space() self.action_space = self._get_action_space() - # no goal learning - # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) - self.observation_space = self._get_observation_space() # rendering @@ -78,7 +71,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.verbose = verbose # condition value - self.desired_conditioning = True + self.desired_traj_bc = desired_traj_bc self.condition_pos = None self.condition_vel = None @@ -157,11 +150,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" - ## tricky part, only use weights basis - # basis_weights = action.reshape(7, -1) - # goal_weights = np.zeros((7, 1)) - # action = np.concatenate((basis_weights, goal_weights), axis=1).flatten() - # TODO remove this part, right now only needed for beer pong mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) position, velocity = self.get_trajectory(mp_params) @@ -201,8 +189,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: continue - self.condition_pos = pos if self.desired_conditioning else self.current_pos - self.condition_vel = vel if self.desired_conditioning else self.current_vel + self.condition_pos = pos if self.desired_traj_bc else self.current_pos + self.condition_vel = vel if self.desired_traj_bc else self.current_vel break diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 4483637..eb44d9f 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -88,6 +88,7 @@ DEFAULT_BB_DICT_ProDMP = { "black_box_kwargs": { 'replanning_schedule': None, 'max_planning_times': None, + 'desired_traj_bc': False, 'verbose': 2 } } @@ -509,6 +510,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['desired_traj_bc'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/test/test_black_box.py b/test/test_black_box.py index 69c0088..fa1cd01 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -205,18 +205,20 @@ def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: assert e is env.e -@pytest.mark.parametrize('mp_type', ['promp']) +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) def test_learn_tau(mp_type: str, tau: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': False }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -237,26 +239,29 @@ def test_learn_tau(mp_type: str, tau: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[tau_time_steps:] == pos[-1]) - assert np.all(vel[tau_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[tau_time_steps:] == pos[-1]) + assert np.all(vel[tau_time_steps:] == vel[-1]) # Check active trajectory section is different to end values assert np.all(pos[:tau_time_steps - 1] != pos[-1]) assert np.all(vel[:tau_time_steps - 2] != vel[-1]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) def test_learn_delay(mp_type: str, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': False, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -283,21 +288,23 @@ def test_learn_delay(mp_type: str, delay: float): # Check active trajectory section is different to beginning values assert np.all(pos[max(1, delay_time_steps):] != pos[0]) assert np.all(vel[max(1, delay_time_steps)] != vel[0]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) @pytest.mark.parametrize('delay', [0.25, 0.5, 0.75, 1]) def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) if env.spec.max_episode_steps * env.dt < delay + tau: @@ -324,8 +331,9 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[joint_time_steps:] == pos[-1]) - assert np.all(vel[joint_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[joint_time_steps:] == pos[-1]) + assert np.all(vel[joint_time_steps:] == vel[-1]) # Check beginning is all same (only true for linear basis) assert np.all(pos[:delay_time_steps - 1] == pos[0]) @@ -336,3 +344,58 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) + + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + for i in range(max_planning_times): + _, _, d, _ = env.step(env.action_space.sample()) + assert d + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + _, _, d, _ = env.step(env.action_space.sample()) + planning_times += 1 + assert planning_times == max_planning_times \ No newline at end of file From 7e3ec7a2eff43fa4caabc62086b7e7d5af8be95a Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 13 Nov 2022 17:59:12 +0100 Subject: [PATCH 04/11] set default max_planning_times to None --- fancy_gym/black_box/black_box_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 88f8a32..8830987 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -23,7 +23,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times: int = 1, + max_planning_times: int = None, desired_traj_bc: bool = False ): """ From be14b21fff6379c948d5fdd78311e76a6e32a363 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 14 Nov 2022 17:39:46 +0100 Subject: [PATCH 05/11] update test suite for replanning envs --- fancy_gym/black_box/black_box_wrapper.py | 1 + test/test_black_box.py | 27 ---- test/test_replanning_sequencing.py | 169 ++++++++++++++++++++++- 3 files changed, 167 insertions(+), 30 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 8830987..ce96b20 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -86,6 +86,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: + # duration = self.duration - self.current_traj_steps * self.dt duration = self.duration if self.learn_sub_trajectories: duration = None diff --git a/test/test_black_box.py b/test/test_black_box.py index fa1cd01..e95cf12 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -372,30 +372,3 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ _, _, d, _ = env.step(env.action_space.sample()) assert d -@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) -@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) -@pytest.mark.parametrize('sub_segment_steps', [5, 10]) -def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' - env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'max_planning_times': max_planning_times, - 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': False, - 'learn_delay': False - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) - _ = env.reset() - d = False - planning_times = 0 - while not d: - _, _, d, _ = env.step(env.action_space.sample()) - planning_times += 1 - assert planning_times == max_planning_times \ No newline at end of file diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index a42bb65..31d4f80 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -98,7 +98,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter assert length <= np.round(env.traj_gen.tau.numpy() / env.dt) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('add_time_aware_wrapper_before', [True, False]) @pytest.mark.parametrize('replanning_time', [10, 100, 1000]) @@ -114,11 +114,14 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra replanning_schedule = lambda c_pos, c_vel, obs, c_action, t: t % replanning_time == 0 + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if 'dmp' in mp_type else 'linear' + env = fancy_gym.make_bb(env_id, [wrapper_class], {'replanning_schedule': replanning_schedule, 'verbose': 2}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, - {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, seed=SEED) + {'phase_generator_type': phase_generator_type}, + {'basis_generator_type': basis_generator_type}, seed=SEED) assert env.do_replanning assert callable(env.replanning_schedule) @@ -142,3 +145,163 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra env.reset() assert replanning_schedule(None, None, None, None, length) + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + _, _, d, _ = env.step(env.action_space.sample()) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('tau', [0.5, 1.0, 1.5, 2.0]) +def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + _, _, d, info = env.step(action) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('delay', [0.1, 0.25, 0.5, 0.75]) +def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_segment_steps: int, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Check beginning is all same (only true for linear basis) + if planning_times == 0: + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + + # only valid when delay < sub_segment_steps + elif planning_times > 0 and delay_time_steps < sub_segment_steps: + assert np.all(pos[1:max(1, delay_time_steps - 1)] != pos[0]) + assert np.all(vel[1:max(1, delay_time_steps - 2)] != vel[0]) + + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10, 15]) +@pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) +@pytest.mark.parametrize('tau', [0.5, 0.75, 1.0]) +def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, + delay: float, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + action[1] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Delay only applies to first planning time + if planning_times == 0: + # Check delay is applied + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times \ No newline at end of file From fc3051bf57c8d6d37096a83f072f5d1ec549226a Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 14 Nov 2022 17:48:15 +0100 Subject: [PATCH 06/11] minor updates --- fancy_gym/black_box/black_box_wrapper.py | 1 - fancy_gym/envs/__init__.py | 4 +--- fancy_gym/examples/examples_movement_primitives.py | 12 ++++++------ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ce96b20..8830987 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -86,7 +86,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: - # duration = self.duration - self.current_traj_steps * self.dt duration = self.duration if self.learn_sub_trajectories: duration = None diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index eb44d9f..2b0e7fd 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -498,15 +498,13 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['name'] = _v kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = np.array([3.4944e+01, 4.3734e+01, 9.6711e+01, 2.4429e+02, 5.8272e+02]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 3.1264e-01 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 707dccd..67d93ae 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,17 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP - example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) + example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=4, render=render) # Altered basis functions - # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) From ca5800fa3d67639b43a14a9d94ec43eaa07a4d81 Mon Sep 17 00:00:00 2001 From: ottofabian Date: Tue, 15 Nov 2022 13:55:56 +0100 Subject: [PATCH 07/11] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1148e85..1c59fa9 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gym[mujoco]<0.25.0,>=0.24.0', + 'gym[mujoco]<0.25.0,>=0.24.1', 'mp_pytorch @ git+https://github.com/ALRhub/MP_PyTorch.git@main' ], packages=[package for package in find_packages() if package.startswith("fancy_gym")], From 2674bf80fe0268e33076652bfb50c3f7aa3f942b Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 20 Nov 2022 21:56:32 +0100 Subject: [PATCH 08/11] update according to reviews opinion & fix bugs in box pushing IK --- fancy_gym/black_box/black_box_wrapper.py | 28 +++---- fancy_gym/envs/__init__.py | 14 +--- .../mujoco/box_pushing/box_pushing_env.py | 15 +--- fancy_gym/examples/example_replanning_envs.py | 66 ++++++++++----- fancy_gym/examples/example_sim_env.py | 9 -- .../examples/examples_movement_primitives.py | 2 +- fancy_gym/utils/make_env_helpers.py | 3 - test/test_black_box.py | 32 +------- test/test_replanning_envs.py | 82 ------------------- test/test_replanning_sequencing.py | 28 ++++++- 10 files changed, 94 insertions(+), 185 deletions(-) delete mode 100644 fancy_gym/examples/example_sim_env.py delete mode 100644 test/test_replanning_envs.py diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 8830987..7fb085c 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -24,7 +24,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, max_planning_times: int = None, - desired_traj_bc: bool = False + condition_on_desired: bool = False ): """ gym.Wrapper for leveraging a black box approach with a trajectory generator. @@ -71,12 +71,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.verbose = verbose # condition value - self.desired_traj_bc = desired_traj_bc + self.condition_on_desired = condition_on_desired self.condition_pos = None self.condition_vel = None self.max_planning_times = max_planning_times - self.plan_counts = 0 + self.plan_steps = 0 def observation(self, observation): # return context space if we are @@ -98,15 +98,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) # TODO we could think about initializing with the previous desired value in order to have a smooth transition # at least from the planning point of view. - # self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) - if self.current_traj_steps == 0: - self.condition_pos = self.current_pos - self.condition_vel = self.current_vel - bc_time = torch.as_tensor(bc_time, dtype=torch.float32) - self.condition_pos = torch.as_tensor(self.condition_pos, dtype=torch.float32) - self.condition_vel = torch.as_tensor(self.condition_vel, dtype=torch.float32) - self.traj_gen.set_boundary_conditions(bc_time, self.condition_pos, self.condition_vel) + condition_pos = self.condition_pos if self.condition_pos is not None else self.current_pos + condition_vel = self.condition_vel if self.condition_vel is not None else self.current_vel + + self.traj_gen.set_boundary_conditions(bc_time, condition_pos, condition_vel) self.traj_gen.set_duration(duration, self.dt) # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) position = get_numpy(self.traj_gen.get_traj_pos()) @@ -164,7 +160,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - self.plan_counts += 1 + self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) @@ -186,11 +182,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps): - if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: + if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: continue - self.condition_pos = pos if self.desired_traj_bc else self.current_pos - self.condition_vel = vel if self.desired_traj_bc else self.current_vel + self.condition_pos = pos if self.condition_on_desired else None + self.condition_vel = vel if self.condition_on_desired else None break @@ -215,6 +211,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_traj_steps = 0 - self.plan_counts = 0 + self.plan_steps = 0 self.traj_gen.reset() return super(BlackBoxWrapper, self).reset() diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 2b0e7fd..bb6a664 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -68,12 +68,9 @@ DEFAULT_BB_DICT_ProDMP = { "wrappers": [], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'prodmp', - 'weights_scale': 1.0, }, "phase_generator_kwargs": { 'phase_generator_type': 'exp', - 'learn_delay': False, - 'learn_tau': False, }, "controller_kwargs": { 'controller_type': 'motor', @@ -86,10 +83,6 @@ DEFAULT_BB_DICT_ProDMP = { 'num_basis': 5, }, "black_box_kwargs": { - 'replanning_schedule': None, - 'max_planning_times': None, - 'desired_traj_bc': False, - 'verbose': 2 } } @@ -492,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v @@ -502,13 +495,12 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['desired_traj_bc'] = True + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desried'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 37babf9..275bba1 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -219,6 +219,8 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): q_old = q q = q + dt * qd_d q = np.clip(q, q_min, q_max) + self.data.qpos[:7] = q + mujoco.mj_forward(self.model, self.data) current_cart_pos = self.data.body("tcp").xpos.copy() current_cart_quat = self.data.body("tcp").xquat.copy() @@ -247,8 +249,10 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): ### get Jacobian by mujoco self.data.qpos[:7] = q mujoco.mj_forward(self.model, self.data) + jacp = self.get_body_jacp("tcp")[:, :7].copy() jacr = self.get_body_jacr("tcp")[:, :7].copy() + J = np.concatenate((jacp, jacr), axis=0) Jw = J.dot(w) @@ -356,14 +360,3 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase): reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward return reward - -if __name__=="__main__": - env = BoxPushingTemporalSpatialSparse(frame_skip=10) - env.reset() - for i in range(10): - env.reset() - for _ in range(100): - env.render("human") - action = env.action_space.sample() - obs, reward, done, info = env.step(action) - print("info: {}".format(info)) diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 392e9d4..d993a71 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -1,38 +1,62 @@ import fancy_gym -import numpy as np -import matplotlib.pyplot as plt -def plot_trajectory(traj): - plt.figure() - plt.plot(traj[:, 3]) - plt.legend() - plt.show() - -def run_replanning_envs(env_name="BoxPushingProDMP-v0", seed=1, iterations=1, render=True): +def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False): env = fancy_gym.make(env_name, seed=seed) env.reset() for i in range(iterations): done = False - desired_pos_traj = np.zeros((100, 7)) - desired_vel_traj = np.zeros((100, 7)) - real_pos_traj = np.zeros((100, 7)) - real_vel_traj = np.zeros((100, 7)) - t = 0 while done is False: ac = env.action_space.sample() obs, reward, done, info = env.step(ac) - desired_pos_traj[t: t + 25, :] = info['desired_pos'] - desired_vel_traj[t: t + 25, :] = info['desired_vel'] - # real_pos_traj.append(info['current_pos']) - # real_vel_traj.append(info['current_vel']) - t += 25 if render: env.render(mode="human") if done: env.reset() - plot_trajectory(desired_pos_traj) env.close() del env +def example_custom_replanning_envs(seed=0, iteration=100, render=True): + # id for a step-based environment + base_env_id = "BoxPushingDense-v0" + + wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] + + trajectory_generator_kwargs = {'trajectory_generator_type': 'prodmp', + 'weight_scale': 1} + phase_generator_kwargs = {'phase_generator_type': 'exp'} + controller_kwargs = {'controller_type': 'velocity'} + basis_generator_kwargs = {'basis_generator_type': 'prodmp', + 'num_basis': 5} + + # max_planning_times: the maximum number of plans can be generated + # replanning_schedule: the trigger for replanning + # condition_on_desired: use desired state as the boundary condition for the next plan + black_box_kwargs = {'max_planning_times': 4, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, + 'desired_traj_bc': True} + + env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) + if render: + env.render(mode="human") + + obs = env.reset() + + for i in range(iteration): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + if done: + env.reset() + + env.close() + del env + + if __name__ == "__main__": - run_replanning_envs(env_name="BoxPushingDenseProDMP-v0", seed=1, iterations=1, render=False) \ No newline at end of file + # run a registered replanning environment + example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) + + # run a custom replanning environment + example_custom_replanning_envs(seed=0, iteration=100, render=True) \ No newline at end of file diff --git a/fancy_gym/examples/example_sim_env.py b/fancy_gym/examples/example_sim_env.py deleted file mode 100644 index f949a89..0000000 --- a/fancy_gym/examples/example_sim_env.py +++ /dev/null @@ -1,9 +0,0 @@ -import gym_blockpush -import gym - -env = gym.make("blockpush-v0") -env.start() -env.scene.reset() -for i in range(100): - env.step(env.action_space.sample()) - env.render() \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 67d93ae..445b8b9 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -164,7 +164,7 @@ if __name__ == '__main__': example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP - example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=4, render=render) + example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) # Altered basis functions obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 0ba7a4a..3c73ba9 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -175,9 +175,6 @@ def make_bb( if phase_kwargs.get('learn_delay'): phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] - if traj_gen_kwargs['trajectory_generator_type'] == 'prodmp': - assert basis_kwargs['basis_generator_type'] == 'prodmp', 'prodmp trajectory generator requires prodmp basis generator' - phase_gen = get_phase_generator(**phase_kwargs) basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs) controller = get_controller(**controller_kwargs) diff --git a/test/test_black_box.py b/test/test_black_box.py index e95cf12..5ade1ae 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -158,7 +158,7 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) -@pytest.mark.parametrize('num_basis', [0, 2, 5]) # should add 1 back after the bug is fixed +@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): @@ -343,32 +343,4 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_pos = pos[delay_time_steps: joint_time_steps - 1] active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) - assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) - - -@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) -@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) -@pytest.mark.parametrize('sub_segment_steps', [5, 10]) -def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' - env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'max_planning_times': max_planning_times, - 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': False, - 'learn_delay': False - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) - _ = env.reset() - d = False - for i in range(max_planning_times): - _, _, d, _ = env.step(env.action_space.sample()) - assert d - + assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) \ No newline at end of file diff --git a/test/test_replanning_envs.py b/test/test_replanning_envs.py deleted file mode 100644 index 4228284..0000000 --- a/test/test_replanning_envs.py +++ /dev/null @@ -1,82 +0,0 @@ -from itertools import chain -from typing import Tuple, Type, Union, Optional, Callable - -import gym -import numpy as np -import pytest -from gym import register -from gym.core import ActType, ObsType - -import fancy_gym -from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper - -import fancy_gym -from test.utils import run_env, run_env_determinism - -Fancy_ProDMP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - -All_ProDMP_IDS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - - -class Object(object): - pass - - -class ToyEnv(gym.Env): - observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) - action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) - dt = 0.02 - - def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): - self.a, self.b, self.c, self.d, self.e = a, b, c, d, e - - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: - return np.array([-1]) - - def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: - return np.array([-1]), 1, False, {} - - def render(self, mode="human"): - pass - - -class ToyWrapper(RawInterfaceWrapper): - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return np.ones(self.action_space.shape) - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return np.zeros(self.action_space.shape) - -@pytest.fixture(scope="session", autouse=True) -def setup(): - register( - id=f'toy-v0', - entry_point='test.test_black_box:ToyEnv', - max_episode_steps=50, - ) -# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) -# def test_replanning_envs(env_id: str): -# """Tests that ProDMP environments run without errors using random actions.""" -# run_env(env_id) -# -# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) -# def test_replanning_determinism(env_id: str): -# """Tests that ProDMP environments are deterministic.""" -# run_env_determinism(env_id, 0) - -@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) -def test_missing_local_state(mp_type: str): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - - env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, - {'trajectory_generator_type': mp_type}, - {'controller_type': 'motor'}, - {'phase_generator_type': 'exp'}, - {'basis_generator_type': basis_generator_type}) - env.reset() - with pytest.raises(NotImplementedError): - env.step(env.action_space.sample()) \ No newline at end of file diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 31d4f80..9d04d02 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -304,4 +304,30 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i planning_times += 1 - assert planning_times == max_planning_times \ No newline at end of file + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + for i in range(max_planning_times): + _, _, d, _ = env.step(env.action_space.sample()) + assert d From 9539ba14246656fc61bbc784141f1d0fc6710930 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 20 Nov 2022 22:00:02 +0100 Subject: [PATCH 09/11] fix some typos --- fancy_gym/envs/__init__.py | 2 +- fancy_gym/examples/example_replanning_envs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index bb6a664..890e043 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -500,7 +500,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desried'] = True + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index d993a71..977ce9e 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -33,7 +33,7 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): # condition_on_desired: use desired state as the boundary condition for the next plan black_box_kwargs = {'max_planning_times': 4, 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, - 'desired_traj_bc': True} + 'condition_on_desired': True} env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, @@ -59,4 +59,4 @@ if __name__ == "__main__": example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) # run a custom replanning environment - example_custom_replanning_envs(seed=0, iteration=100, render=True) \ No newline at end of file + example_custom_replanning_envs(seed=0, iteration=8, render=True) \ No newline at end of file From a9ad1b528710a5f15e17c05ad6d7361c70c8676f Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 21 Nov 2022 10:17:56 +0100 Subject: [PATCH 10/11] minor update: create local variables for current pos/vel --- fancy_gym/black_box/black_box_wrapper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 7fb085c..9d41615 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -162,7 +162,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): - step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) + current_pos = self.current_pos + current_vel = self.current_vel + step_action = self.tracking_controller.get_action(pos, vel, current_pos, current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, done, info = self.env.step(c_action) rewards[t] = c_reward @@ -179,7 +181,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + if done or self.replanning_schedule(current_pos, current_vel, obs, c_action, t + 1 + self.current_traj_steps): if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: From 833f0735ea8b9beaa8d5d152fbdc8691d9aabfca Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Fri, 25 Nov 2022 13:31:05 +0100 Subject: [PATCH 11/11] delete import torch in black box wrapper --- fancy_gym/black_box/black_box_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 9d41615..2a2a3f5 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -2,7 +2,6 @@ from typing import Tuple, Optional, Callable import gym import numpy as np -import torch from gym import spaces from mp_pytorch.mp.mp_interfaces import MPInterface