diff --git a/fancy_gym/black_box/controller/base_controller.py b/fancy_gym/black_box/controller/base_controller.py index 1ac1522..e9045aa 100644 --- a/fancy_gym/black_box/controller/base_controller.py +++ b/fancy_gym/black_box/controller/base_controller.py @@ -2,3 +2,6 @@ class BaseController: def get_action(self, des_pos, des_vel, c_pos, c_vel): raise NotImplementedError + + def __call__(self, des_pos, des_vel, c_pos, c_vel): + return self.get_action(des_pos, des_vel, c_pos, c_vel) diff --git a/fancy_gym/black_box/controller/meta_world_controller.py b/fancy_gym/black_box/controller/meta_world_controller.py index efd8983..3e5bd37 100644 --- a/fancy_gym/black_box/controller/meta_world_controller.py +++ b/fancy_gym/black_box/controller/meta_world_controller.py @@ -18,7 +18,8 @@ class MetaWorldController(BaseController): cur_pos = c_pos[:-1] xyz_pos = des_pos[:-1] - assert xyz_pos.shape == cur_pos.shape, \ - f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}" + if xyz_pos.shape != cur_pos.shape: + raise ValueError(f"Mismatch in dimension between desired position" + f" {xyz_pos.shape} and current position {cur_pos.shape}") trq = np.hstack([(xyz_pos - cur_pos), gripper_pos]) return trq diff --git a/fancy_gym/black_box/controller/pd_controller.py b/fancy_gym/black_box/controller/pd_controller.py index 35203d8..78c2adc 100644 --- a/fancy_gym/black_box/controller/pd_controller.py +++ b/fancy_gym/black_box/controller/pd_controller.py @@ -8,7 +8,6 @@ class PDController(BaseController): A PD-Controller. Using position and velocity information from a provided environment, the tracking_controller calculates a response based on the desired position and velocity - :param env: A position environment :param p_gains: Factors for the proportional gains :param d_gains: Factors for the differential gains """ @@ -20,9 +19,11 @@ class PDController(BaseController): self.d_gains = d_gains def get_action(self, des_pos, des_vel, c_pos, c_vel): - assert des_pos.shape == c_pos.shape, \ - f"Mismatch in dimension between desired position {des_pos.shape} and current position {c_pos.shape}" - assert des_vel.shape == c_vel.shape, \ - f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {c_vel.shape}" + if des_pos.shape != c_pos.shape: + raise ValueError(f"Mismatch in dimension between desired position " + f"{des_pos.shape} and current position {c_pos.shape}") + if des_vel.shape != c_vel.shape: + raise ValueError(f"Mismatch in dimension between desired velocity" + f" {des_vel.shape} and current velocity {c_vel.shape}") trq = self.p_gains * (des_pos - c_pos) + self.d_gains * (des_vel - c_vel) return trq diff --git a/test/test_black_box.py b/test/test_black_box.py index 974e3b3..fb7f78c 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -5,12 +5,11 @@ import gym import numpy as np import pytest from gym import register -from gym.wrappers import TimeLimit - -from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from gym.core import ActType, ObsType import fancy_gym +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper +from fancy_gym.utils.time_aware_observation import TimeAwareObservation SEED = 1 ENV_IDS = ['Reacher5d-v0', 'dmc:ball_in_cup-catch', 'metaworld:reach-v2', 'Reacher-v2'] @@ -19,11 +18,18 @@ WRAPPERS = [fancy_gym.envs.mujoco.reacher.MPWrapper, fancy_gym.dmc.suite.ball_in ALL_MP_ENVS = chain(*fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()) +class Object(object): + pass + + class ToyEnv(gym.Env): observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) dt = 0.01 + def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): + self.a, self.b, self.c, self.d, self.e = a, b, c, d, e + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: return np.array([-1]) @@ -61,6 +67,67 @@ def test_missing_wrapper(env_id: str): fancy_gym.make_bb(env_id, [], {}, {}, {}, {}, {}) +def test_missing_local_state(): + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, + {'trajectory_generator_type': 'promp'}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear'}, + {'basis_generator_type': 'rbf'}) + env.reset() + with pytest.raises(NotImplementedError): + env.step(env.action_space.sample()) + + +@pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) +@pytest.mark.parametrize('verbose', [1, 2]) +def test_verbosity(env_wrap: Tuple[str, Type[RawInterfaceWrapper]], verbose: int): + env_id, wrapper_class = env_wrap + env = fancy_gym.make_bb(env_id, [wrapper_class], {}, + {'trajectory_generator_type': 'promp'}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear'}, + {'basis_generator_type': 'rbf'}) + env.reset() + info_keys = env.step(env.action_space.sample())[3].keys() + + env_step = fancy_gym.make(env_id, SEED) + env_step.reset() + info_keys_step = env_step.step(env_step.action_space.sample())[3].keys() + + assert info_keys_step in info_keys + assert 'trajectory_length' in info_keys + + if verbose >= 2: + mp_keys = ['position', 'velocities', 'step_actions', 'step_observations', 'step_rewards'] + assert mp_keys in info_keys + + +@pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) +def test_length(env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): + env_id, wrapper_class = env_wrap + env = fancy_gym.make_bb(env_id, [wrapper_class], {}, + {'trajectory_generator_type': 'promp'}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear'}, + {'basis_generator_type': 'rbf'}) + env.reset() + length = env.step(env.action_space.sample())[3]['trajectory_length'] + + assert length == env.spec.max_episode_steps + + +@pytest.mark.parametrize('reward_aggregation', [np.sum, np.mean, np.median, lambda x: np.mean(x[::2])]) +def test_aggregation(reward_aggregation: callable): + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'reward_aggregation': reward_aggregation}, + {'trajectory_generator_type': 'promp'}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear'}, + {'basis_generator_type': 'rbf'}) + env.reset() + # ToyEnv only returns 1 as reward + assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) + + @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_context_space(env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): env_id, wrapper_class = env_wrap @@ -75,30 +142,74 @@ def test_context_space(env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): assert env.observation_space.shape == wrapper.context_mask[wrapper.context_mask].shape -@pytest.mark.parametrize('env_id', ENV_IDS) -@pytest.mark.parametrize('reward_aggregation', [np.sum, np.mean, np.median, lambda x: np.mean(x[::2])]) -def test_aggregation(env_id: str, reward_aggregation: callable): - env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'reward_aggregation': reward_aggregation}, +@pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) +@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) +@pytest.mark.parametrize('learn_tau', [True, False]) +@pytest.mark.parametrize('learn_delay', [True, False]) +def test_action_space(num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, + {'trajectory_generator_type': 'promp', + 'action_dim': num_dof + }, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear', + 'learn_tau': learn_tau, + 'learn_delay': learn_delay + }, + {'basis_generator_type': 'rbf', + 'num_basis': num_basis + }) + assert env.action_space.shape[0] == num_dof * num_basis + int(learn_tau) + int(learn_delay) + + +@pytest.mark.parametrize('a', [1]) +@pytest.mark.parametrize('b', [1.0]) +@pytest.mark.parametrize('c', [[1], [1.0], ['str'], [{'a': 'b'}], [np.ones(3, )]]) +@pytest.mark.parametrize('d', [{'a': 1}, {1: 2.0}, {'a': [1.0]}, {'a': np.ones(3, )}, {'a': {'a': 'b'}}]) +@pytest.mark.parametrize('e', [Object()]) +def test_change_env_kwargs(a: int, b: float, c: list, d: dict, e: Object): + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, + {'trajectory_generator_type': 'promp'}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'linear'}, + {'basis_generator_type': 'rbf'}, + a=a, b=b, c=c, d=d, e=e + ) + assert a is env.a + assert b is env.b + assert c is env.c + # Due to how gym works dict kwargs need to be copied and hence can only be checked to have the same content + assert d == env.d + assert e is env.e + + +@pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) +@pytest.mark.parametrize('add_time_aware_wrapper_before', [True, False]) +def test_learn_sub_trajectories(env_wrap: Tuple[str, Type[RawInterfaceWrapper]], add_time_aware_wrapper_before: bool): + env_id, wrapper_class = env_wrap + env_step = TimeAwareObservation(fancy_gym.make(env_id, SEED)) + wrappers = [wrapper_class] + + # has time aware wrapper + if add_time_aware_wrapper_before: + wrappers += [TimeAwareObservation] + + env = fancy_gym.make_bb(env_id, [wrapper_class], {'learn_sub_trajectories': True}, {'trajectory_generator_type': 'promp'}, {'controller_type': 'motor'}, {'phase_generator_type': 'linear'}, {'basis_generator_type': 'rbf'}) - env.reset() - - # ToyEnv only returns 1 as reward - assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) - - -@pytest.mark.parametrize('env_id', ENV_IDS) -@pytest.mark.parametrize('add_time_aware_wrapper_before', [True, False]) -def test_learn_sub_trajectories(env_id: str, add_time_aware_wrapper_before: bool): - env_step = fancy_gym.make(env_id, SEED) - env = fancy_gym.make_bb(env_id, [], {}, {}, {}, {'phase_generator_type': 'linear'}, {}) - - # has time aware wrapper - if add_time_aware_wrapper_before: - pass assert env.learn_sub_trajectories - assert env.learn_tau + assert env.traj_gen.learn_tau assert env.observation_space == env_step.observation_space + + env.reset() + action = env.action_space.sample() + obs, r, d, info = env.step(action) + + length = info['trajectory_length'] + + factor = 1 / env.dt + assert np.allclose(length * env.dt, np.round(factor * action[0]) / factor) + assert np.allclose(length * env.dt, np.round(factor * env.traj_gen.tau.numpy()) / factor) diff --git a/test/test_controller.py b/test/test_controller.py new file mode 100644 index 0000000..c530c50 --- /dev/null +++ b/test/test_controller.py @@ -0,0 +1,73 @@ +from typing import Tuple, Union + +import numpy as np +import pytest + +from fancy_gym.black_box.factory import controller_factory + + +@pytest.mark.parametrize('ctrl_type', controller_factory.ALL_TYPES) +def test_initialization(ctrl_type: str): + controller_factory.get_controller(ctrl_type) + + +@pytest.mark.parametrize('position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('velocity', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +def test_velocity(position: np.ndarray, velocity: np.ndarray): + ctrl = controller_factory.get_controller('velocity') + a = ctrl(position, velocity, None, None) + assert np.array_equal(a, velocity) + + +@pytest.mark.parametrize('position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('velocity', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +def test_position(position: np.ndarray, velocity: np.ndarray): + ctrl = controller_factory.get_controller('position') + a = ctrl(position, velocity, None, None) + assert np.array_equal(a, position) + + +@pytest.mark.parametrize('position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('velocity', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('current_position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('current_velocity', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('p_gains', [0, 1, 0.5, np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('d_gains', [0, 1, 0.5, np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +def test_pd(position: np.ndarray, velocity: np.ndarray, current_position: np.ndarray, current_velocity: np.ndarray, + p_gains: Union[float, Tuple], d_gains: Union[float, Tuple]): + ctrl = controller_factory.get_controller('motor', p_gains=p_gains, d_gains=d_gains) + assert np.array_equal(ctrl.p_gains, p_gains) + assert np.array_equal(ctrl.d_gains, d_gains) + + a = ctrl(position, velocity, current_position, current_velocity) + pd = p_gains * (position - current_position) + d_gains * (velocity - current_velocity) + assert np.array_equal(a, pd) + + +@pytest.mark.parametrize('pos_vel', [(np.ones(3, ), np.ones(4, )), + (np.ones(4, ), np.ones(3, )), + (np.ones(4, ), np.ones(4, ))]) +def test_pd_invalid_shapes(pos_vel: Tuple[np.ndarray, np.ndarray]): + position, velocity = pos_vel + ctrl = controller_factory.get_controller('motor') + with pytest.raises(ValueError): + ctrl(position, velocity, np.ones(3, ), np.ones(3, )) + + +@pytest.mark.parametrize('position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('current_position', [np.zeros(3, ), np.ones(3, ), np.arange(0, 3)]) +@pytest.mark.parametrize('gripper_pos', [0, 1, 0.5]) +def test_metaworld(position: np.ndarray, current_position: np.ndarray, gripper_pos: float): + ctrl = controller_factory.get_controller('metaworld') + + position_grip = np.append(position, gripper_pos) + c_position_grip = np.append(current_position, -1) + a = ctrl(position_grip, None, c_position_grip, None) + assert a[-1] == gripper_pos + assert np.array_equal(a[:-1], position - current_position) + + +def test_metaworld_invalid_shapes(): + ctrl = controller_factory.get_controller('metaworld') + with pytest.raises(ValueError): + ctrl(np.ones(4, ), None, np.ones(3, ), None)