From ffe48dfb57899177c4722864f20d0cb4e7144097 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 9 Nov 2022 12:49:17 +0100 Subject: [PATCH 01/27] change back infos to default setting --- fancy_gym/black_box/black_box_wrapper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index dc5445e..ea28ef7 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -210,10 +210,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.current_traj_steps += t + 1 if self.verbose >= 2: - infos['desired_pos'] = position[:t+1] - infos['desired_vel'] = velocity[:t+1] - infos['current_pos'] = self.current_pos - infos['current_vel'] = self.current_vel + infos['positions'] = position + infos['velocities'] = velocity infos['step_actions'] = actions[:t + 1] infos['step_observations'] = observations[:t + 1] infos['step_rewards'] = rewards[:t + 1] From 60e18d29646d237671ab81b697ae5036845390d3 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 9 Nov 2022 17:54:34 +0100 Subject: [PATCH 02/27] add prodmp to test_black_box --- fancy_gym/envs/__init__.py | 2 +- fancy_gym/examples/example_sim_env.py | 9 +++ .../examples/examples_movement_primitives.py | 10 +-- fancy_gym/utils/make_env_helpers.py | 3 + test/test_black_box.py | 37 +++++---- test/test_replanning_envs.py | 75 +++++++++++++++++-- 6 files changed, 108 insertions(+), 28 deletions(-) create mode 100644 fancy_gym/examples/example_sim_env.py diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index d3dfa8e..4483637 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -503,7 +503,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 diff --git a/fancy_gym/examples/example_sim_env.py b/fancy_gym/examples/example_sim_env.py new file mode 100644 index 0000000..f949a89 --- /dev/null +++ b/fancy_gym/examples/example_sim_env.py @@ -0,0 +1,9 @@ +import gym_blockpush +import gym + +env = gym.make("blockpush-v0") +env.start() +env.scene.reset() +for i in range(100): + env.step(env.action_space.sample()) + env.render() \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index e19eacb..707dccd 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,17 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 3c73ba9..0ba7a4a 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -175,6 +175,9 @@ def make_bb( if phase_kwargs.get('learn_delay'): phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] + if traj_gen_kwargs['trajectory_generator_type'] == 'prodmp': + assert basis_kwargs['basis_generator_type'] == 'prodmp', 'prodmp trajectory generator requires prodmp basis generator' + phase_gen = get_phase_generator(**phase_kwargs) basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs) controller = get_controller(**controller_kwargs) diff --git a/test/test_black_box.py b/test/test_black_box.py index d5e3a88..69c0088 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -67,28 +67,32 @@ def test_missing_wrapper(env_id: str): fancy_gym.make_bb(env_id, [], {}, {}, {}, {}, {}) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) def test_missing_local_state(mp_type: str): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() with pytest.raises(NotImplementedError): env.step(env.action_space.sample()) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('verbose', [1, 2]) def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]], verbose: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {'verbose': verbose}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() info_keys = list(env.step(env.action_space.sample())[3].keys()) @@ -104,15 +108,17 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]] assert all(e in info_keys for e in mp_keys) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + env_id, wrapper_class = env_wrap env = fancy_gym.make_bb(env_id, [wrapper_class], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) for _ in range(5): env.reset() @@ -121,14 +127,15 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]): assert length == env.spec.max_episode_steps -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('reward_aggregation', [np.sum, np.mean, np.median, lambda x: np.mean(x[::2])]) def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], float]): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'reward_aggregation': reward_aggregation}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}) + {'basis_generator_type': basis_generator_type}) env.reset() # ToyEnv only returns 1 as reward assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, )) @@ -149,12 +156,13 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp assert env.observation_space.shape == wrapper.context_mask[wrapper.context_mask].shape -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) -@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) +@pytest.mark.parametrize('num_basis', [0, 2, 5]) # should add 1 back after the bug is fixed @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type, 'action_dim': num_dof @@ -164,28 +172,29 @@ def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: boo 'learn_tau': learn_tau, 'learn_delay': learn_delay }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, 'num_basis': num_basis }) base_dims = num_dof * num_basis - additional_dims = num_dof if mp_type == 'dmp' else 0 + additional_dims = num_dof if 'dmp' in mp_type else 0 traj_modification_dims = int(learn_tau) + int(learn_delay) assert env.action_space.shape[0] == base_dims + traj_modification_dims + additional_dims -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('a', [1]) @pytest.mark.parametrize('b', [1.0]) @pytest.mark.parametrize('c', [[1], [1.0], ['str'], [{'a': 'b'}], [np.ones(3, )]]) @pytest.mark.parametrize('d', [{'a': 1}, {1: 2.0}, {'a': [1.0]}, {'a': np.ones(3, )}, {'a': {'a': 'b'}}]) @pytest.mark.parametrize('e', [Object()]) def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: Object): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, + {'basis_generator_type': basis_generator_type}, a=a, b=b, c=c, d=d, e=e ) assert a is env.a diff --git a/test/test_replanning_envs.py b/test/test_replanning_envs.py index 300faed..4228284 100644 --- a/test/test_replanning_envs.py +++ b/test/test_replanning_envs.py @@ -1,6 +1,14 @@ from itertools import chain +from typing import Tuple, Type, Union, Optional, Callable +import gym +import numpy as np import pytest +from gym import register +from gym.core import ActType, ObsType + +import fancy_gym +from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper import fancy_gym from test.utils import run_env, run_env_determinism @@ -10,14 +18,65 @@ Fancy_ProDMP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] All_ProDMP_IDS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] +class Object(object): + pass -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_envs(env_id: str): - """Tests that ProDMP environments run without errors using random actions.""" - run_env(env_id) -@pytest.mark.parametrize('env_id', All_ProDMP_IDS) -def test_replanning_determinism(env_id: str): - """Tests that ProDMP environments are deterministic.""" - run_env_determinism(env_id, 0) +class ToyEnv(gym.Env): + observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) + dt = 0.02 + def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): + self.a, self.b, self.c, self.d, self.e = a, b, c, d, e + + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, + options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: + return np.array([-1]) + + def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: + return np.array([-1]), 1, False, {} + + def render(self, mode="human"): + pass + + +class ToyWrapper(RawInterfaceWrapper): + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return np.ones(self.action_space.shape) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return np.zeros(self.action_space.shape) + +@pytest.fixture(scope="session", autouse=True) +def setup(): + register( + id=f'toy-v0', + entry_point='test.test_black_box:ToyEnv', + max_episode_steps=50, + ) +# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) +# def test_replanning_envs(env_id: str): +# """Tests that ProDMP environments run without errors using random actions.""" +# run_env(env_id) +# +# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) +# def test_replanning_determinism(env_id: str): +# """Tests that ProDMP environments are deterministic.""" +# run_env_determinism(env_id, 0) + +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) +def test_missing_local_state(mp_type: str): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + + env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, + {'trajectory_generator_type': mp_type}, + {'controller_type': 'motor'}, + {'phase_generator_type': 'exp'}, + {'basis_generator_type': basis_generator_type}) + env.reset() + with pytest.raises(NotImplementedError): + env.step(env.action_space.sample()) \ No newline at end of file From 104b90929608f1299f8c559452c457a43fcb498d Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 13 Nov 2022 16:59:13 +0100 Subject: [PATCH 03/27] delete hacky experimental codes & add tests to test_black_box --- fancy_gym/black_box/black_box_wrapper.py | 20 +---- fancy_gym/envs/__init__.py | 2 + test/test_black_box.py | 97 +++++++++++++++++++----- 3 files changed, 86 insertions(+), 33 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ea28ef7..88f8a32 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -24,7 +24,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, max_planning_times: int = 1, - desired_conditioning: bool = False + desired_traj_bc: bool = False ): """ gym.Wrapper for leveraging a black box approach with a trajectory generator. @@ -59,18 +59,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): # reward computation self.reward_aggregation = reward_aggregation - # self.traj_gen.basis_gn.show_basis(plot=True) # spaces self.return_context_observation = not (learn_sub_trajectories or self.do_replanning) - # self.return_context_observation = True self.traj_gen_action_space = self._get_traj_gen_action_space() self.action_space = self._get_action_space() - # no goal learning - # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) - self.observation_space = self._get_observation_space() # rendering @@ -78,7 +71,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.verbose = verbose # condition value - self.desired_conditioning = True + self.desired_traj_bc = desired_traj_bc self.condition_pos = None self.condition_vel = None @@ -157,11 +150,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" - ## tricky part, only use weights basis - # basis_weights = action.reshape(7, -1) - # goal_weights = np.zeros((7, 1)) - # action = np.concatenate((basis_weights, goal_weights), axis=1).flatten() - # TODO remove this part, right now only needed for beer pong mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) position, velocity = self.get_trajectory(mp_params) @@ -201,8 +189,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: continue - self.condition_pos = pos if self.desired_conditioning else self.current_pos - self.condition_vel = vel if self.desired_conditioning else self.current_vel + self.condition_pos = pos if self.desired_traj_bc else self.current_pos + self.condition_vel = vel if self.desired_traj_bc else self.current_vel break diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 4483637..eb44d9f 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -88,6 +88,7 @@ DEFAULT_BB_DICT_ProDMP = { "black_box_kwargs": { 'replanning_schedule': None, 'max_planning_times': None, + 'desired_traj_bc': False, 'verbose': 2 } } @@ -509,6 +510,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['desired_traj_bc'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/test/test_black_box.py b/test/test_black_box.py index 69c0088..fa1cd01 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -205,18 +205,20 @@ def test_change_env_kwargs(mp_type: str, a: int, b: float, c: list, d: dict, e: assert e is env.e -@pytest.mark.parametrize('mp_type', ['promp']) +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) def test_learn_tau(mp_type: str, tau: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': False }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -237,26 +239,29 @@ def test_learn_tau(mp_type: str, tau: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[tau_time_steps:] == pos[-1]) - assert np.all(vel[tau_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[tau_time_steps:] == pos[-1]) + assert np.all(vel[tau_time_steps:] == vel[-1]) # Check active trajectory section is different to end values assert np.all(pos[:tau_time_steps - 1] != pos[-1]) assert np.all(vel[:tau_time_steps - 2] != vel[-1]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) def test_learn_delay(mp_type: str, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': False, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) d = True @@ -283,21 +288,23 @@ def test_learn_delay(mp_type: str, delay: float): # Check active trajectory section is different to beginning values assert np.all(pos[max(1, delay_time_steps):] != pos[0]) assert np.all(vel[max(1, delay_time_steps)] != vel[0]) - - -@pytest.mark.parametrize('mp_type', ['promp']) +# +# +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) @pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1]) @pytest.mark.parametrize('delay', [0.25, 0.5, 0.75, 1]) def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' env = fancy_gym.make_bb('toy-v0', [ToyWrapper], {'verbose': 2}, {'trajectory_generator_type': mp_type, }, {'controller_type': 'motor'}, - {'phase_generator_type': 'linear', + {'phase_generator_type': phase_generator_type, 'learn_tau': True, 'learn_delay': True }, - {'basis_generator_type': 'rbf', + {'basis_generator_type': basis_generator_type, }, seed=SEED) if env.spec.max_episode_steps * env.dt < delay + tau: @@ -324,8 +331,9 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): vel = info['velocities'].flatten() # Check end is all same (only true for linear basis) - assert np.all(pos[joint_time_steps:] == pos[-1]) - assert np.all(vel[joint_time_steps:] == vel[-1]) + if phase_generator_type == "linear": + assert np.all(pos[joint_time_steps:] == pos[-1]) + assert np.all(vel[joint_time_steps:] == vel[-1]) # Check beginning is all same (only true for linear basis) assert np.all(pos[:delay_time_steps - 1] == pos[0]) @@ -336,3 +344,58 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) + + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + for i in range(max_planning_times): + _, _, d, _ = env.step(env.action_space.sample()) + assert d + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + _, _, d, _ = env.step(env.action_space.sample()) + planning_times += 1 + assert planning_times == max_planning_times \ No newline at end of file From 7e3ec7a2eff43fa4caabc62086b7e7d5af8be95a Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 13 Nov 2022 17:59:12 +0100 Subject: [PATCH 04/27] set default max_planning_times to None --- fancy_gym/black_box/black_box_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 88f8a32..8830987 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -23,7 +23,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times: int = 1, + max_planning_times: int = None, desired_traj_bc: bool = False ): """ From be14b21fff6379c948d5fdd78311e76a6e32a363 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 14 Nov 2022 17:39:46 +0100 Subject: [PATCH 05/27] update test suite for replanning envs --- fancy_gym/black_box/black_box_wrapper.py | 1 + test/test_black_box.py | 27 ---- test/test_replanning_sequencing.py | 169 ++++++++++++++++++++++- 3 files changed, 167 insertions(+), 30 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 8830987..ce96b20 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -86,6 +86,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: + # duration = self.duration - self.current_traj_steps * self.dt duration = self.duration if self.learn_sub_trajectories: duration = None diff --git a/test/test_black_box.py b/test/test_black_box.py index fa1cd01..e95cf12 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -372,30 +372,3 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_ _, _, d, _ = env.step(env.action_space.sample()) assert d -@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) -@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) -@pytest.mark.parametrize('sub_segment_steps', [5, 10]) -def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' - env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'max_planning_times': max_planning_times, - 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': False, - 'learn_delay': False - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) - _ = env.reset() - d = False - planning_times = 0 - while not d: - _, _, d, _ = env.step(env.action_space.sample()) - planning_times += 1 - assert planning_times == max_planning_times \ No newline at end of file diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index a42bb65..31d4f80 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -98,7 +98,7 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter assert length <= np.round(env.traj_gen.tau.numpy() / env.dt) -@pytest.mark.parametrize('mp_type', ['promp', 'dmp']) +@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('env_wrap', zip(ENV_IDS, WRAPPERS)) @pytest.mark.parametrize('add_time_aware_wrapper_before', [True, False]) @pytest.mark.parametrize('replanning_time', [10, 100, 1000]) @@ -114,11 +114,14 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra replanning_schedule = lambda c_pos, c_vel, obs, c_action, t: t % replanning_time == 0 + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if 'dmp' in mp_type else 'linear' + env = fancy_gym.make_bb(env_id, [wrapper_class], {'replanning_schedule': replanning_schedule, 'verbose': 2}, {'trajectory_generator_type': mp_type}, {'controller_type': 'motor'}, - {'phase_generator_type': 'exp'}, - {'basis_generator_type': 'rbf'}, seed=SEED) + {'phase_generator_type': phase_generator_type}, + {'basis_generator_type': basis_generator_type}, seed=SEED) assert env.do_replanning assert callable(env.replanning_schedule) @@ -142,3 +145,163 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra env.reset() assert replanning_schedule(None, None, None, None, length) + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + _, _, d, _ = env.step(env.action_space.sample()) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('tau', [0.5, 1.0, 1.5, 2.0]) +def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + _, _, d, info = env.step(action) + planning_times += 1 + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +@pytest.mark.parametrize('delay', [0.1, 0.25, 0.5, 0.75]) +def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_segment_steps: int, delay: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Check beginning is all same (only true for linear basis) + if planning_times == 0: + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + + # only valid when delay < sub_segment_steps + elif planning_times > 0 and delay_time_steps < sub_segment_steps: + assert np.all(pos[1:max(1, delay_time_steps - 1)] != pos[0]) + assert np.all(vel[1:max(1, delay_time_steps - 2)] != vel[0]) + + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10, 15]) +@pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75]) +@pytest.mark.parametrize('tau', [0.5, 0.75, 1.0]) +def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: int, sub_segment_steps: int, + delay: float, tau: float): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'max_planning_times': max_planning_times, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': True, + 'learn_delay': True + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + planning_times = 0 + while not d: + action = env.action_space.sample() + action[0] = tau + action[1] = delay + _, _, d, info = env.step(action) + + delay_time_steps = int(np.round(delay / env.dt)) + + pos = info['positions'].flatten() + vel = info['velocities'].flatten() + + # Delay only applies to first planning time + if planning_times == 0: + # Check delay is applied + assert np.all(pos[:max(1, delay_time_steps - 1)] == pos[0]) + assert np.all(vel[:max(1, delay_time_steps - 2)] == vel[0]) + # Check active trajectory section is different to beginning values + assert np.all(pos[max(1, delay_time_steps):] != pos[0]) + assert np.all(vel[max(1, delay_time_steps)] != vel[0]) + + planning_times += 1 + + assert planning_times == max_planning_times \ No newline at end of file From fc3051bf57c8d6d37096a83f072f5d1ec549226a Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 14 Nov 2022 17:48:15 +0100 Subject: [PATCH 06/27] minor updates --- fancy_gym/black_box/black_box_wrapper.py | 1 - fancy_gym/envs/__init__.py | 4 +--- fancy_gym/examples/examples_movement_primitives.py | 12 ++++++------ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ce96b20..8830987 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -86,7 +86,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: - # duration = self.duration - self.current_traj_steps * self.dt duration = self.duration if self.learn_sub_trajectories: duration = None diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index eb44d9f..2b0e7fd 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -498,15 +498,13 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['name'] = _v kwargs_dict_box_pushing_prodmp['controller_kwargs']['p_gains'] = 0.01 * np.array([120., 120., 120., 120., 50., 30., 10.]) kwargs_dict_box_pushing_prodmp['controller_kwargs']['d_gains'] = 0.01 * np.array([10., 10., 10., 10., 6., 5., 3.]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = np.array([3.4944e+01, 4.3734e+01, 9.6711e+01, 2.4429e+02, 5.8272e+02]) - # kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 3.1264e-01 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['weights_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 707dccd..67d93ae 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,17 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP - example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=16, render=render) + example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=4, render=render) # Altered basis functions - # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) From ca5800fa3d67639b43a14a9d94ec43eaa07a4d81 Mon Sep 17 00:00:00 2001 From: ottofabian Date: Tue, 15 Nov 2022 13:55:56 +0100 Subject: [PATCH 07/27] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1148e85..1c59fa9 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( ], extras_require=extras, install_requires=[ - 'gym[mujoco]<0.25.0,>=0.24.0', + 'gym[mujoco]<0.25.0,>=0.24.1', 'mp_pytorch @ git+https://github.com/ALRhub/MP_PyTorch.git@main' ], packages=[package for package in find_packages() if package.startswith("fancy_gym")], From 2674bf80fe0268e33076652bfb50c3f7aa3f942b Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 20 Nov 2022 21:56:32 +0100 Subject: [PATCH 08/27] update according to reviews opinion & fix bugs in box pushing IK --- fancy_gym/black_box/black_box_wrapper.py | 28 +++---- fancy_gym/envs/__init__.py | 14 +--- .../mujoco/box_pushing/box_pushing_env.py | 15 +--- fancy_gym/examples/example_replanning_envs.py | 66 ++++++++++----- fancy_gym/examples/example_sim_env.py | 9 -- .../examples/examples_movement_primitives.py | 2 +- fancy_gym/utils/make_env_helpers.py | 3 - test/test_black_box.py | 32 +------- test/test_replanning_envs.py | 82 ------------------- test/test_replanning_sequencing.py | 28 ++++++- 10 files changed, 94 insertions(+), 185 deletions(-) delete mode 100644 fancy_gym/examples/example_sim_env.py delete mode 100644 test/test_replanning_envs.py diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 8830987..7fb085c 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -24,7 +24,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, max_planning_times: int = None, - desired_traj_bc: bool = False + condition_on_desired: bool = False ): """ gym.Wrapper for leveraging a black box approach with a trajectory generator. @@ -71,12 +71,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.verbose = verbose # condition value - self.desired_traj_bc = desired_traj_bc + self.condition_on_desired = condition_on_desired self.condition_pos = None self.condition_vel = None self.max_planning_times = max_planning_times - self.plan_counts = 0 + self.plan_steps = 0 def observation(self, observation): # return context space if we are @@ -98,15 +98,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) # TODO we could think about initializing with the previous desired value in order to have a smooth transition # at least from the planning point of view. - # self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) - if self.current_traj_steps == 0: - self.condition_pos = self.current_pos - self.condition_vel = self.current_vel - bc_time = torch.as_tensor(bc_time, dtype=torch.float32) - self.condition_pos = torch.as_tensor(self.condition_pos, dtype=torch.float32) - self.condition_vel = torch.as_tensor(self.condition_vel, dtype=torch.float32) - self.traj_gen.set_boundary_conditions(bc_time, self.condition_pos, self.condition_vel) + condition_pos = self.condition_pos if self.condition_pos is not None else self.current_pos + condition_vel = self.condition_vel if self.condition_vel is not None else self.current_vel + + self.traj_gen.set_boundary_conditions(bc_time, condition_pos, condition_vel) self.traj_gen.set_duration(duration, self.dt) # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) position = get_numpy(self.traj_gen.get_traj_pos()) @@ -164,7 +160,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - self.plan_counts += 1 + self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) @@ -186,11 +182,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps): - if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: + if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: continue - self.condition_pos = pos if self.desired_traj_bc else self.current_pos - self.condition_vel = vel if self.desired_traj_bc else self.current_vel + self.condition_pos = pos if self.condition_on_desired else None + self.condition_vel = vel if self.condition_on_desired else None break @@ -215,6 +211,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_traj_steps = 0 - self.plan_counts = 0 + self.plan_steps = 0 self.traj_gen.reset() return super(BlackBoxWrapper, self).reset() diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 2b0e7fd..bb6a664 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -68,12 +68,9 @@ DEFAULT_BB_DICT_ProDMP = { "wrappers": [], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'prodmp', - 'weights_scale': 1.0, }, "phase_generator_kwargs": { 'phase_generator_type': 'exp', - 'learn_delay': False, - 'learn_tau': False, }, "controller_kwargs": { 'controller_type': 'motor', @@ -86,10 +83,6 @@ DEFAULT_BB_DICT_ProDMP = { 'num_basis': 5, }, "black_box_kwargs": { - 'replanning_schedule': None, - 'max_planning_times': None, - 'desired_traj_bc': False, - 'verbose': 2 } } @@ -492,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v @@ -502,13 +495,12 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_scale'] = 0.3 kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True kwargs_dict_box_pushing_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 0 - kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['alpha'] = 10. + kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['desired_traj_bc'] = True + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desried'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py index 37babf9..275bba1 100644 --- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py +++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py @@ -219,6 +219,8 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): q_old = q q = q + dt * qd_d q = np.clip(q, q_min, q_max) + self.data.qpos[:7] = q + mujoco.mj_forward(self.model, self.data) current_cart_pos = self.data.body("tcp").xpos.copy() current_cart_quat = self.data.body("tcp").xquat.copy() @@ -247,8 +249,10 @@ class BoxPushingEnvBase(MujocoEnv, utils.EzPickle): ### get Jacobian by mujoco self.data.qpos[:7] = q mujoco.mj_forward(self.model, self.data) + jacp = self.get_body_jacp("tcp")[:, :7].copy() jacr = self.get_body_jacr("tcp")[:, :7].copy() + J = np.concatenate((jacp, jacr), axis=0) Jw = J.dot(w) @@ -356,14 +360,3 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase): reward += box_goal_pos_dist_reward + box_goal_rot_dist_reward return reward - -if __name__=="__main__": - env = BoxPushingTemporalSpatialSparse(frame_skip=10) - env.reset() - for i in range(10): - env.reset() - for _ in range(100): - env.render("human") - action = env.action_space.sample() - obs, reward, done, info = env.step(action) - print("info: {}".format(info)) diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index 392e9d4..d993a71 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -1,38 +1,62 @@ import fancy_gym -import numpy as np -import matplotlib.pyplot as plt -def plot_trajectory(traj): - plt.figure() - plt.plot(traj[:, 3]) - plt.legend() - plt.show() - -def run_replanning_envs(env_name="BoxPushingProDMP-v0", seed=1, iterations=1, render=True): +def example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False): env = fancy_gym.make(env_name, seed=seed) env.reset() for i in range(iterations): done = False - desired_pos_traj = np.zeros((100, 7)) - desired_vel_traj = np.zeros((100, 7)) - real_pos_traj = np.zeros((100, 7)) - real_vel_traj = np.zeros((100, 7)) - t = 0 while done is False: ac = env.action_space.sample() obs, reward, done, info = env.step(ac) - desired_pos_traj[t: t + 25, :] = info['desired_pos'] - desired_vel_traj[t: t + 25, :] = info['desired_vel'] - # real_pos_traj.append(info['current_pos']) - # real_vel_traj.append(info['current_vel']) - t += 25 if render: env.render(mode="human") if done: env.reset() - plot_trajectory(desired_pos_traj) env.close() del env +def example_custom_replanning_envs(seed=0, iteration=100, render=True): + # id for a step-based environment + base_env_id = "BoxPushingDense-v0" + + wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] + + trajectory_generator_kwargs = {'trajectory_generator_type': 'prodmp', + 'weight_scale': 1} + phase_generator_kwargs = {'phase_generator_type': 'exp'} + controller_kwargs = {'controller_type': 'velocity'} + basis_generator_kwargs = {'basis_generator_type': 'prodmp', + 'num_basis': 5} + + # max_planning_times: the maximum number of plans can be generated + # replanning_schedule: the trigger for replanning + # condition_on_desired: use desired state as the boundary condition for the next plan + black_box_kwargs = {'max_planning_times': 4, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, + 'desired_traj_bc': True} + + env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, + traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, + phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, + seed=seed) + if render: + env.render(mode="human") + + obs = env.reset() + + for i in range(iteration): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + if done: + env.reset() + + env.close() + del env + + if __name__ == "__main__": - run_replanning_envs(env_name="BoxPushingDenseProDMP-v0", seed=1, iterations=1, render=False) \ No newline at end of file + # run a registered replanning environment + example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) + + # run a custom replanning environment + example_custom_replanning_envs(seed=0, iteration=100, render=True) \ No newline at end of file diff --git a/fancy_gym/examples/example_sim_env.py b/fancy_gym/examples/example_sim_env.py deleted file mode 100644 index f949a89..0000000 --- a/fancy_gym/examples/example_sim_env.py +++ /dev/null @@ -1,9 +0,0 @@ -import gym_blockpush -import gym - -env = gym.make("blockpush-v0") -env.start() -env.scene.reset() -for i in range(100): - env.step(env.action_space.sample()) - env.render() \ No newline at end of file diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 67d93ae..445b8b9 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -164,7 +164,7 @@ if __name__ == '__main__': example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # ProDMP - example_mp("BoxPushingDenseProDMP-v0", seed=10, iterations=4, render=render) + example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) # Altered basis functions obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 0ba7a4a..3c73ba9 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -175,9 +175,6 @@ def make_bb( if phase_kwargs.get('learn_delay'): phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] - if traj_gen_kwargs['trajectory_generator_type'] == 'prodmp': - assert basis_kwargs['basis_generator_type'] == 'prodmp', 'prodmp trajectory generator requires prodmp basis generator' - phase_gen = get_phase_generator(**phase_kwargs) basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs) controller = get_controller(**controller_kwargs) diff --git a/test/test_black_box.py b/test/test_black_box.py index e95cf12..5ade1ae 100644 --- a/test/test_black_box.py +++ b/test/test_black_box.py @@ -158,7 +158,7 @@ def test_context_space(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapp @pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) @pytest.mark.parametrize('num_dof', [0, 1, 2, 5]) -@pytest.mark.parametrize('num_basis', [0, 2, 5]) # should add 1 back after the bug is fixed +@pytest.mark.parametrize('num_basis', [0, 1, 2, 5]) @pytest.mark.parametrize('learn_tau', [True, False]) @pytest.mark.parametrize('learn_delay', [True, False]) def test_action_space(mp_type: str, num_dof: int, num_basis: int, learn_tau: bool, learn_delay: bool): @@ -343,32 +343,4 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float): active_pos = pos[delay_time_steps: joint_time_steps - 1] active_vel = vel[delay_time_steps: joint_time_steps - 2] assert np.all(active_pos != pos[-1]) and np.all(active_pos != pos[0]) - assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) - - -@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) -@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) -@pytest.mark.parametrize('sub_segment_steps', [5, 10]) -def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' - env = fancy_gym.make_bb('toy-v0', [ToyWrapper], - {'max_planning_times': max_planning_times, - 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, - 'verbose': 2}, - {'trajectory_generator_type': mp_type, - }, - {'controller_type': 'motor'}, - {'phase_generator_type': phase_generator_type, - 'learn_tau': False, - 'learn_delay': False - }, - {'basis_generator_type': basis_generator_type, - }, - seed=SEED) - _ = env.reset() - d = False - for i in range(max_planning_times): - _, _, d, _ = env.step(env.action_space.sample()) - assert d - + assert np.all(active_vel != vel[-1]) and np.all(active_vel != vel[0]) \ No newline at end of file diff --git a/test/test_replanning_envs.py b/test/test_replanning_envs.py deleted file mode 100644 index 4228284..0000000 --- a/test/test_replanning_envs.py +++ /dev/null @@ -1,82 +0,0 @@ -from itertools import chain -from typing import Tuple, Type, Union, Optional, Callable - -import gym -import numpy as np -import pytest -from gym import register -from gym.core import ActType, ObsType - -import fancy_gym -from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper - -import fancy_gym -from test.utils import run_env, run_env_determinism - -Fancy_ProDMP_IDS = fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - -All_ProDMP_IDS = fancy_gym.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProDMP'] - - -class Object(object): - pass - - -class ToyEnv(gym.Env): - observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) - action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float64) - dt = 0.02 - - def __init__(self, a: int = 0, b: float = 0.0, c: list = [], d: dict = {}, e: Object = Object()): - self.a, self.b, self.c, self.d, self.e = a, b, c, d, e - - def reset(self, *, seed: Optional[int] = None, return_info: bool = False, - options: Optional[dict] = None) -> Union[ObsType, Tuple[ObsType, dict]]: - return np.array([-1]) - - def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: - return np.array([-1]), 1, False, {} - - def render(self, mode="human"): - pass - - -class ToyWrapper(RawInterfaceWrapper): - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return np.ones(self.action_space.shape) - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return np.zeros(self.action_space.shape) - -@pytest.fixture(scope="session", autouse=True) -def setup(): - register( - id=f'toy-v0', - entry_point='test.test_black_box:ToyEnv', - max_episode_steps=50, - ) -# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) -# def test_replanning_envs(env_id: str): -# """Tests that ProDMP environments run without errors using random actions.""" -# run_env(env_id) -# -# @pytest.mark.parametrize('env_id', All_ProDMP_IDS) -# def test_replanning_determinism(env_id: str): -# """Tests that ProDMP environments are deterministic.""" -# run_env_determinism(env_id, 0) - -@pytest.mark.parametrize('mp_type', ['promp', 'dmp', 'prodmp']) -def test_missing_local_state(mp_type: str): - basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' - - env = fancy_gym.make_bb('toy-v0', [RawInterfaceWrapper], {}, - {'trajectory_generator_type': mp_type}, - {'controller_type': 'motor'}, - {'phase_generator_type': 'exp'}, - {'basis_generator_type': basis_generator_type}) - env.reset() - with pytest.raises(NotImplementedError): - env.step(env.action_space.sample()) \ No newline at end of file diff --git a/test/test_replanning_sequencing.py b/test/test_replanning_sequencing.py index 31d4f80..9d04d02 100644 --- a/test/test_replanning_sequencing.py +++ b/test/test_replanning_sequencing.py @@ -304,4 +304,30 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i planning_times += 1 - assert planning_times == max_planning_times \ No newline at end of file + assert planning_times == max_planning_times + +@pytest.mark.parametrize('mp_type', ['promp', 'prodmp']) +@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4]) +@pytest.mark.parametrize('sub_segment_steps', [5, 10]) +def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_steps: int): + basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf' + phase_generator_type = 'exp' if mp_type == 'prodmp' else 'linear' + env = fancy_gym.make_bb('toy-v0', [ToyWrapper], + {'max_planning_times': max_planning_times, + 'replanning_schedule': lambda pos, vel, obs, action, t: t % sub_segment_steps == 0, + 'verbose': 2}, + {'trajectory_generator_type': mp_type, + }, + {'controller_type': 'motor'}, + {'phase_generator_type': phase_generator_type, + 'learn_tau': False, + 'learn_delay': False + }, + {'basis_generator_type': basis_generator_type, + }, + seed=SEED) + _ = env.reset() + d = False + for i in range(max_planning_times): + _, _, d, _ = env.step(env.action_space.sample()) + assert d From 9539ba14246656fc61bbc784141f1d0fc6710930 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 20 Nov 2022 22:00:02 +0100 Subject: [PATCH 09/27] fix some typos --- fancy_gym/envs/__init__.py | 2 +- fancy_gym/examples/example_replanning_envs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index bb6a664..890e043 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -500,7 +500,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desried'] = True + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True register( id=_env_id, entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', diff --git a/fancy_gym/examples/example_replanning_envs.py b/fancy_gym/examples/example_replanning_envs.py index d993a71..977ce9e 100644 --- a/fancy_gym/examples/example_replanning_envs.py +++ b/fancy_gym/examples/example_replanning_envs.py @@ -33,7 +33,7 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): # condition_on_desired: use desired state as the boundary condition for the next plan black_box_kwargs = {'max_planning_times': 4, 'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, - 'desired_traj_bc': True} + 'condition_on_desired': True} env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, @@ -59,4 +59,4 @@ if __name__ == "__main__": example_run_replanning_env(env_name="BoxPushingDenseReplanProDMP-v0", seed=1, iterations=1, render=False) # run a custom replanning environment - example_custom_replanning_envs(seed=0, iteration=100, render=True) \ No newline at end of file + example_custom_replanning_envs(seed=0, iteration=8, render=True) \ No newline at end of file From a9ad1b528710a5f15e17c05ad6d7361c70c8676f Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 21 Nov 2022 10:17:56 +0100 Subject: [PATCH 10/27] minor update: create local variables for current pos/vel --- fancy_gym/black_box/black_box_wrapper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 7fb085c..9d41615 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -162,7 +162,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): - step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) + current_pos = self.current_pos + current_vel = self.current_vel + step_action = self.tracking_controller.get_action(pos, vel, current_pos, current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, done, info = self.env.step(c_action) rewards[t] = c_reward @@ -179,7 +181,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + if done or self.replanning_schedule(current_pos, current_vel, obs, c_action, t + 1 + self.current_traj_steps): if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: From f47f00a292fe4851934cb53bdfd29b8431d62971 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 23 Nov 2022 17:02:04 +0100 Subject: [PATCH 11/27] updates && disable auto_scale_basis for table tennis --- fancy_gym/envs/__init__.py | 2 +- fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index ef52785..b3ba3aa 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -569,7 +569,7 @@ for _v in _versions: kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 1.0 kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_scale'] = 1.0 - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = True + kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 9b2359a..72b92e0 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -151,8 +151,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def reset_model(self): self._steps = 0 - self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) - self._goal_pos = self._generate_goal_pos(random=True) + self._init_ball_state = self._generate_valid_init_ball(random_pos=False, random_vel=False) + self._goal_pos = self._generate_goal_pos(random=False) self.data.joint("tar_x").qpos = self._init_ball_state[0] self.data.joint("tar_y").qpos = self._init_ball_state[1] self.data.joint("tar_z").qpos = self._init_ball_state[2] From 96f17e02cfe4e5dee6dbc36bb857ed5b63531107 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 24 Nov 2022 14:15:09 +0100 Subject: [PATCH 12/27] random sampling for goal switching & adjust height for initial ball state --- fancy_gym/envs/__init__.py | 3 +- .../mujoco/table_tennis/table_tennis_env.py | 61 +++++++++++-------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index b3ba3aa..f74bdcd 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -260,7 +260,8 @@ for ctxt_dim in [2, 4]: "ctxt_dim": ctxt_dim, 'frame_skip': 4, 'enable_wind': False, - 'enable_switching_goal': False, + 'enable_switching_goal': True, + 'enable_air': False, } ) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 72b92e0..7a77443 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -87,8 +87,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False if self._enable_goal_switching: - if self._steps == 45 and self.np_random.uniform(0, 1) < 0.5: - self._goal_pos[1] = -self._goal_pos[1] + if self._steps == 99 and self.np_random.uniform(0, 1) < 0.5: + new_goal_pos = self._generate_goal_pos(random=True) + new_goal_pos[1] = -new_goal_pos[1] + self._goal_pos = new_goal_pos self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) mujoco.mj_forward(self.model, self.data) @@ -151,8 +153,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def reset_model(self): self._steps = 0 - self._init_ball_state = self._generate_valid_init_ball(random_pos=False, random_vel=False) - self._goal_pos = self._generate_goal_pos(random=False) + self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) + self._init_ball_state[2] = 1.85 + self._goal_pos = self._generate_goal_pos(random=True) self.data.joint("tar_x").qpos = self._init_ball_state[0] self.data.joint("tar_y").qpos = self._init_ball_state[1] self.data.joint("tar_z").qpos = self._init_ball_state[2] @@ -167,7 +170,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): mujoco.mj_forward(self.model, self.data) if self._enable_wind: - self._wind_vel[1] = self.np_random.uniform(low=-5, high=5, size=1) + self._wind_vel[1] = self.np_random.uniform(low=-10, high=10, size=1) self.model.opt.wind[:3] = self._wind_vel self._hit_ball = False @@ -251,37 +254,43 @@ def plot_ball_traj_2d(x_traj, y_traj): ax.plot(x_traj, y_traj) plt.show() -def plot_single_axis(traj, title): +def plot_compare_trajs(traj1, traj2, title): import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) - ax.plot(traj) + ax.plot(traj1, color='r', label='traj1') + ax.plot(traj2, color='b', label='traj2') ax.set_title(title) + plt.legend() plt.show() if __name__ == "__main__": - env = TableTennisEnv(enable_air=True) - # env_with_air = TableTennisEnv(enable_air=True) - for _ in range(1): - obs1 = env.reset() + env_air = TableTennisEnv(enable_air=True, enable_wind=False) + env_no_air = TableTennisEnv(enable_air=False, enable_wind=False) + for _ in range(10): + obs1 = env_air.reset() + obs2 = env_no_air.reset() # obs2 = env_with_air.reset() - x_pos = [] - y_pos = [] - z_pos = [] - x_vel = [] - y_vel = [] - z_vel = [] + air_x_pos = [] + no_air_x_pos = [] + # y_pos = [] + # z_pos = [] + # x_vel = [] + # y_vel = [] + # z_vel = [] for _ in range(2000): - obs, reward, done, info = env.step(np.zeros(7)) + # env_air.render("human") + obs1, reward1, done1, info1 = env_air.step(np.zeros(7)) + obs2, reward2, done2, info2 = env_no_air.step(np.zeros(7)) # _, _, _, _ = env_no_air.step(np.zeros(7)) - x_pos.append(env.data.joint("tar_x").qpos[0]) - y_pos.append(env.data.joint("tar_y").qpos[0]) - z_pos.append(env.data.joint("tar_z").qpos[0]) - x_vel.append(env.data.joint("tar_x").qvel[0]) - y_vel.append(env.data.joint("tar_y").qvel[0]) - z_vel.append(env.data.joint("tar_z").qvel[0]) + air_x_pos.append(env_air.data.joint("tar_z").qpos[0]) + no_air_x_pos.append(env_no_air.data.joint("tar_z").qpos[0]) + # z_pos.append(env.data.joint("tar_z").qpos[0]) + # x_vel.append(env.data.joint("tar_x").qvel[0]) + # y_vel.append(env.data.joint("tar_y").qvel[0]) + # z_vel.append(env.data.joint("tar_z").qvel[0]) # print(reward) - if done: + if info1["num_steps"] == 150: # plot_ball_traj_2d(x_pos, y_pos) - plot_single_axis(x_pos, title="x_vel without air") + plot_compare_trajs(air_x_pos, no_air_x_pos, title="z_pos with/out air") break From b883ad63b7417509b4bc754c8ae448c1c3dd490e Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 24 Nov 2022 16:50:57 +0100 Subject: [PATCH 13/27] using artifical wind field --- fancy_gym/envs/__init__.py | 3 +- .../envs/mujoco/table_tennis/mp_wrapper.py | 2 +- .../mujoco/table_tennis/table_tennis_env.py | 41 ++++++++++++------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index f74bdcd..58512bc 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -260,8 +260,9 @@ for ctxt_dim in [2, 4]: "ctxt_dim": ctxt_dim, 'frame_skip': 4, 'enable_wind': False, - 'enable_switching_goal': True, + 'enable_switching_goal': False, 'enable_air': False, + 'enable_artifical_wind': True, } ) diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index fa26c90..b3519af 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -16,7 +16,7 @@ class MPWrapper(RawInterfaceWrapper): [False] * 7, # joints velocity [True] * 2, # position ball x, y [False] * 1, # position ball z - # [False] * 3, # velocity ball x, y, z + [True] * 3, # velocity ball x, y, z [True] * 2, # target landing position # [True] * 1, # time ]) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 7a77443..5283e54 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -24,7 +24,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, enable_switching_goal: bool = False, - enable_wind: bool = False, enable_magnus: bool = False, + enable_wind: bool = False, + enable_artifical_wind: bool = False, + enable_magnus: bool = False, enable_air: bool = False): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -48,6 +50,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._enable_goal_switching = enable_switching_goal + self._enable_artifical_wind = enable_artifical_wind + + self._artifical_force = 0. + MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), frame_skip=frame_skip, @@ -86,6 +92,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False + if self._enable_goal_switching: if self._steps == 99 and self.np_random.uniform(0, 1) < 0.5: new_goal_pos = self._generate_goal_pos(random=True) @@ -95,6 +102,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): mujoco.mj_forward(self.model, self.data) for _ in range(self.frame_skip): + if self._enable_artifical_wind: + self.data.qfrc_applied[-2] = self._artifical_force try: self.do_simulation(action, 1) except Exception as e: @@ -154,7 +163,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def reset_model(self): self._steps = 0 self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) - self._init_ball_state[2] = 1.85 + # self._init_ball_state[2] = 1.85 self._goal_pos = self._generate_goal_pos(random=True) self.data.joint("tar_x").qpos = self._init_ball_state[0] self.data.joint("tar_y").qpos = self._init_ball_state[1] @@ -163,6 +172,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_y").qvel = self._init_ball_state[4] self.data.joint("tar_z").qvel = self._init_ball_state[5] + if self._enable_artifical_wind: + self._artifical_force = self.np_random.uniform(low=-0.1, high=0.1) + self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) @@ -196,9 +208,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_x").qpos.copy(), self.data.joint("tar_y").qpos.copy(), self.data.joint("tar_z").qpos.copy(), - # self.data.joint("tar_x").qvel.copy(), - # self.data.joint("tar_y").qvel.copy(), - # self.data.joint("tar_z").qvel.copy(), + self.data.joint("tar_x").qvel.copy(), + self.data.joint("tar_y").qvel.copy(), + self.data.joint("tar_z").qvel.copy(), # self.data.body("target_ball").xvel.copy(), self._goal_pos.copy(), ]) @@ -265,7 +277,7 @@ def plot_compare_trajs(traj1, traj2, title): plt.show() if __name__ == "__main__": - env_air = TableTennisEnv(enable_air=True, enable_wind=False) + env_air = TableTennisEnv(enable_air=False, enable_wind=False, enable_artifical_wind=True) env_no_air = TableTennisEnv(enable_air=False, enable_wind=False) for _ in range(10): obs1 = env_air.reset() @@ -279,18 +291,17 @@ if __name__ == "__main__": # y_vel = [] # z_vel = [] for _ in range(2000): - # env_air.render("human") + env_air.render("human") obs1, reward1, done1, info1 = env_air.step(np.zeros(7)) obs2, reward2, done2, info2 = env_no_air.step(np.zeros(7)) - # _, _, _, _ = env_no_air.step(np.zeros(7)) + # # _, _, _, _ = env_no_air.step(np.zeros(7)) air_x_pos.append(env_air.data.joint("tar_z").qpos[0]) no_air_x_pos.append(env_no_air.data.joint("tar_z").qpos[0]) - # z_pos.append(env.data.joint("tar_z").qpos[0]) - # x_vel.append(env.data.joint("tar_x").qvel[0]) - # y_vel.append(env.data.joint("tar_y").qvel[0]) - # z_vel.append(env.data.joint("tar_z").qvel[0]) - # print(reward) + # # z_pos.append(env.data.joint("tar_z").qpos[0]) + # # x_vel.append(env.data.joint("tar_x").qvel[0]) + # # y_vel.append(env.data.joint("tar_y").qvel[0]) + # # z_vel.append(env.data.joint("tar_z").qvel[0]) + # # print(reward) if info1["num_steps"] == 150: - # plot_ball_traj_2d(x_pos, y_pos) - plot_compare_trajs(air_x_pos, no_air_x_pos, title="z_pos with/out air") + plot_compare_trajs(air_x_pos, no_air_x_pos, title="z_pos with/out wind") break From 833f0735ea8b9beaa8d5d152fbdc8691d9aabfca Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Fri, 25 Nov 2022 13:31:05 +0100 Subject: [PATCH 14/27] delete import torch in black box wrapper --- fancy_gym/black_box/black_box_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 9d41615..2a2a3f5 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -2,7 +2,6 @@ from typing import Tuple, Optional, Callable import gym import numpy as np -import torch from gym import spaces from mp_pytorch.mp.mp_interfaces import MPInterface From 932431a12099e1de83a6d4e5ca0058172c55b27c Mon Sep 17 00:00:00 2001 From: "xb1196@partner.kit.edu" Date: Fri, 25 Nov 2022 16:02:52 +0100 Subject: [PATCH 15/27] temporal saving --- fancy_gym/envs/__init__.py | 4 ++-- fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py | 2 +- fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 58512bc..9e45c40 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -262,7 +262,7 @@ for ctxt_dim in [2, 4]: 'enable_wind': False, 'enable_switching_goal': False, 'enable_air': False, - 'enable_artifical_wind': True, + 'enable_artifical_wind': False, } ) @@ -578,7 +578,7 @@ for _v in _versions: kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try - kwargs_dict_tt_prodmp['basis_generator_kwargs']['pre_compute_length_factor'] = 5 + #kwargs_dict_tt_prodmp['basis_generator_kwargs']['pre_compute_length_factor'] = 5 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index b3519af..dcb2306 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -16,7 +16,7 @@ class MPWrapper(RawInterfaceWrapper): [False] * 7, # joints velocity [True] * 2, # position ball x, y [False] * 1, # position ball z - [True] * 3, # velocity ball x, y, z + #[True] * 3, # velocity ball x, y, z [True] * 2, # target landing position # [True] * 1, # time ]) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 5283e54..23a8668 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -208,9 +208,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_x").qpos.copy(), self.data.joint("tar_y").qpos.copy(), self.data.joint("tar_z").qpos.copy(), - self.data.joint("tar_x").qvel.copy(), - self.data.joint("tar_y").qvel.copy(), - self.data.joint("tar_z").qvel.copy(), + #self.data.joint("tar_x").qvel.copy(), + #self.data.joint("tar_y").qvel.copy(), + #self.data.joint("tar_z").qvel.copy(), # self.data.body("target_ball").xvel.copy(), self._goal_pos.copy(), ]) From 2735e0bf24fa78ea2a45aa92bf09bf98e42d2b44 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Fri, 25 Nov 2022 22:34:46 +0100 Subject: [PATCH 16/27] add contextual obs option to invalid trajectory callback --- fancy_gym/black_box/black_box_wrapper.py | 17 ++++++++--------- .../envs/mujoco/table_tennis/mp_wrapper.py | 6 ++++-- .../examples/examples_movement_primitives.py | 13 +++++++------ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 1dddf2c..a8baa84 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -169,8 +169,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - - if traj_is_valid: self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): @@ -207,18 +205,19 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos.update({k: v[:t+1] for k, v in infos.items()}) self.current_traj_steps += t + 1 - if self.verbose >= 2: - infos['positions'] = position - infos['velocities'] = velocity - infos['step_actions'] = actions[:t + 1] - infos['step_observations'] = observations[:t + 1] - infos['step_rewards'] = rewards[:t + 1] + if self.verbose >= 2: + infos['positions'] = position + infos['velocities'] = velocity + infos['step_actions'] = actions[:t + 1] + infos['step_observations'] = observations[:t + 1] + infos['step_rewards'] = rewards[:t + 1] infos['trajectory_length'] = t + 1 trajectory_return = self.reward_aggregation(rewards[:t + 1]) return self.observation(obs), trajectory_return, done, infos else: - obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity) + obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, + self.return_context_observation) return self.observation(obs), trajectory_return, done, infos def render(self, **kwargs): diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index dcb2306..1da8de5 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -55,8 +55,8 @@ class MPWrapper(RawInterfaceWrapper): return False return True - def invalid_traj_callback(self, action, pos_traj: np.ndarray, vel_traj: np.ndarray) \ - -> Tuple[np.ndarray, float, bool, dict]: + def invalid_traj_callback(self, action, pos_traj: np.ndarray, vel_traj: np.ndarray, + return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) @@ -64,6 +64,8 @@ class MPWrapper(RawInterfaceWrapper): invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ violate_high_bound_error + violate_low_bound_error obs = np.concatenate([self.get_obs(), np.array([0])]) + if return_contextual_obs: + obs = self.get_obs() return obs, -invalid_penalty, True, { "hit_ball": [False], "ball_returned_success": [False], diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 445b8b9..7ac6c69 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -157,17 +157,18 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = True # DMP - example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) # ProDMP - example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) # Altered basis functions - obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render) From 28aa430fd24b8a36c94a215aea20e27bfca56758 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Fri, 25 Nov 2022 22:38:21 +0100 Subject: [PATCH 17/27] fix minor bugs --- fancy_gym/black_box/black_box_wrapper.py | 2 +- fancy_gym/examples/examples_movement_primitives.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index a8baa84..ad79fc9 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -194,7 +194,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if done or self.replanning_schedule(current_pos, current_vel, obs, c_action, t + 1 + self.current_traj_steps): - if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: + if not done and self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: continue self.condition_pos = pos if self.condition_on_desired else None diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 7ac6c69..a58d08b 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -162,11 +162,11 @@ if __name__ == '__main__': # ProMP # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) - example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) + # example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) # ProDMP # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - + example_mp("TableTennis4DProDMP-v0", seed=10, iterations=20, render=render) # Altered basis functions # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) From bf0de5f54d311a146523f1b295c52fe68d2c5005 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 27 Nov 2022 01:25:31 +0100 Subject: [PATCH 18/27] set boundary condition to None at reset() & end replanning if env is done --- fancy_gym/black_box/black_box_wrapper.py | 10 +++++----- fancy_gym/envs/__init__.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 2a2a3f5..d8dcbaa 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -161,9 +161,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): - current_pos = self.current_pos - current_vel = self.current_vel - step_action = self.tracking_controller.get_action(pos, vel, current_pos, current_vel) + step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, done, info = self.env.step(c_action) rewards[t] = c_reward @@ -180,10 +178,10 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(current_pos, current_vel, obs, c_action, + if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps): - if self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: + if not done and self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: continue self.condition_pos = pos if self.condition_on_desired else None @@ -214,4 +212,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.current_traj_steps = 0 self.plan_steps = 0 self.traj_gen.reset() + self.condition_vel = None + self.condition_pos = None return super(BlackBoxWrapper, self).reset() diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 890e043..f1a59ec 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -485,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' + _env_id = f'{_name[0]}ProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v @@ -498,7 +498,7 @@ for _v in _versions: kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['num_basis'] = 4 kwargs_dict_box_pushing_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_box_pushing_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 - kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 2 + kwargs_dict_box_pushing_prodmp['black_box_kwargs']['max_planning_times'] = 4 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 25 == 0 kwargs_dict_box_pushing_prodmp['black_box_kwargs']['condition_on_desired'] = True register( From bb94c9c70705f3904660f67b8a6b19d7f696d01f Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Sun, 27 Nov 2022 01:26:18 +0100 Subject: [PATCH 19/27] typos --- fancy_gym/envs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index f1a59ec..d504990 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -485,7 +485,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_box_pushing_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) kwargs_dict_box_pushing_prodmp['wrappers'].append(mujoco.box_pushing.MPWrapper) kwargs_dict_box_pushing_prodmp['name'] = _v From e7d2454e304ccc69539e9252469619b9d5e90056 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Mon, 28 Nov 2022 10:49:05 +0100 Subject: [PATCH 20/27] make if condition better --- fancy_gym/black_box/black_box_wrapper.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index d8dcbaa..66c5f3e 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -22,7 +22,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): replanning_schedule: Optional[ Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None, reward_aggregation: Callable[[np.ndarray], float] = np.sum, - max_planning_times: int = None, + max_planning_times: int = np.inf, condition_on_desired: bool = False ): """ @@ -178,11 +178,9 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): - - if not done and self.max_planning_times is not None and self.plan_steps >= self.max_planning_times: - continue + if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps) + and self.plan_steps < self.max_planning_times): self.condition_pos = pos if self.condition_on_desired else None self.condition_vel = vel if self.condition_on_desired else None From f376772c2230ff4a55880ba779cfa163d9a3f99d Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 11:28:03 +0100 Subject: [PATCH 21/27] add invalid trajectory callback & invalid traj return & register all 3 variantes of table tennis tasks --- fancy_gym/black_box/black_box_wrapper.py | 26 ++- fancy_gym/black_box/raw_interface_wrapper.py | 18 ++- fancy_gym/envs/__init__.py | 40 +++-- fancy_gym/envs/mujoco/__init__.py | 2 +- .../envs/mujoco/table_tennis/__init__.py | 2 +- .../envs/mujoco/table_tennis/mp_wrapper.py | 64 +++----- .../mujoco/table_tennis/table_tennis_env.py | 148 ++++++++---------- .../examples/examples_movement_primitives.py | 9 +- fancy_gym/utils/make_env_helpers.py | 4 +- 9 files changed, 151 insertions(+), 162 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index ad79fc9..336ea44 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -63,14 +63,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): self.traj_gen_action_space = self._get_traj_gen_action_space() self.action_space = self._get_action_space() - # no goal learning - # tricky_action_upperbound = [np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # tricky_action_lowerbound = [-np.inf] * (self.traj_gen_action_space.shape[0] - 7) - # self.action_space = spaces.Box(np.array(tricky_action_lowerbound), np.array(tricky_action_upperbound), dtype=np.float32) - self.action_space.low[0] = 0.8 - self.action_space.high[0] = 1.5 - self.action_space.low[1] = 0.05 - self.action_space.high[1] = 0.15 self.observation_space = self._get_observation_space() # rendering @@ -93,8 +85,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): return observation.astype(self.observation_space.dtype) def get_trajectory(self, action: np.ndarray) -> Tuple: - # duration = self.duration - duration = self.duration - self.current_traj_steps * self.dt + duration = self.duration + # duration = self.duration - self.current_traj_steps * self.dt if self.learn_sub_trajectories: duration = None # reset with every new call as we need to set all arguments, such as tau, delay, again. @@ -157,8 +149,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): # TODO remove this part, right now only needed for beer pong # mp_params, env_spec_params, proceed = self.env.episode_callback(action, self.traj_gen) position, velocity = self.get_trajectory(action) - traj_is_valid = self.env.episode_callback(action, position, velocity) - + traj_is_valid = self.env.preprocessing_and_validity_callback(action, position, velocity) + # insert validation here trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) if self.verbose >= 2: @@ -169,7 +161,11 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - if traj_is_valid: + if not traj_is_valid: + obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, + self.return_context_observation) + return self.observation(obs), trajectory_return, done, infos + else: self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): current_pos = self.current_pos @@ -215,10 +211,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos['trajectory_length'] = t + 1 trajectory_return = self.reward_aggregation(rewards[:t + 1]) return self.observation(obs), trajectory_return, done, infos - else: - obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, - self.return_context_observation) - return self.observation(obs), trajectory_return, done, infos def render(self, **kwargs): """Only set render options here, such that they can be used during the rollout. diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py index de34346..f41faab 100644 --- a/fancy_gym/black_box/raw_interface_wrapper.py +++ b/fancy_gym/black_box/raw_interface_wrapper.py @@ -52,6 +52,19 @@ class RawInterfaceWrapper(gym.Wrapper): """ return self.env.dt + def preprocessing_and_validity_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) \ + -> Tuple[bool, np.ndarray, np.ndarray]: + """ + Used to preprocess the action and check if the desired trajectory is valid. + """ + return True, pos_traj, vel_traj + + def set_episode_arguments(self, action, pos_traj, vel_traj): + """ + Used to set the arguments for env that valid for the whole episode + """ + return pos_traj, vel_traj + def episode_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.array) -> Tuple[bool]: """ Used to extract the parameters for the movement primitive and other parameters from an action array which might @@ -68,7 +81,6 @@ class RawInterfaceWrapper(gym.Wrapper): def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: """ - Used to return a fake return from the environment if the desired trajectory is invalid. + Used to return a artificial return from the env if the desired trajectory is invalid. """ - obs = np.zeros(1) - return obs, 0, True, {} \ No newline at end of file + return np.zeros(1), 0, True, {} \ No newline at end of file diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 438bd9b..bbcdd91 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -18,6 +18,8 @@ from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, \ BoxPushingTemporalSpatialSparse, MAX_EPISODE_STEPS_BOX_PUSHING +from .mujoco.table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching, \ + MAX_EPISODE_STEPS_TABLE_TENNIS ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "ProDMP": []} @@ -248,17 +250,28 @@ for ctxt_dim in [2, 4]: register( id='TableTennis{}D-v0'.format(ctxt_dim), entry_point='fancy_gym.envs.mujoco:TableTennisEnv', - max_episode_steps=350, + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, kwargs={ "ctxt_dim": ctxt_dim, 'frame_skip': 4, - 'enable_wind': False, - 'enable_switching_goal': False, - 'enable_air': False, - 'enable_artifical_wind': False, + 'goal_switching_step': None, + 'enable_artificial_wind': False, } ) +register( + id='TableTennisWind-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisWind', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + +register( + id='TableTennisGoalSwitching-v0', + entry_point='fancy_gym.envs.mujoco:TableTennisGoalSwitching', + max_episode_steps=MAX_EPISODE_STEPS_TABLE_TENNIS, +) + + # movement Primitive Environments ## Simple Reacher @@ -529,17 +542,22 @@ for _v in _versions: ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProDMP"].append(_env_id) ## Table Tennis -_versions = ['TableTennis2D-v0', 'TableTennis4D-v0'] +_versions = ['TableTennis2D-v0', 'TableTennis4D-v0', 'TableTennisWind-v0', 'TableTennisGoalSwitching-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_tt_promp = deepcopy(DEFAULT_BB_DICT_ProMP) - kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.MPWrapper) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_promp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) kwargs_dict_tt_promp['name'] = _v kwargs_dict_tt_promp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_promp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) kwargs_dict_tt_promp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_tt_promp['phase_generator_kwargs']['learn_delay'] = True + kwargs_dict_tt_promp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_promp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 @@ -556,7 +574,10 @@ for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProDMP-{_name[1]}' kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) - kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.MPWrapper) + if _v == 'TableTennisWind-v0': + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) + else: + kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TT_MPWrapper) kwargs_dict_tt_prodmp['name'] = _v kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) @@ -564,12 +585,13 @@ for _v in _versions: kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_scale'] = 1.0 kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 + kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] + kwargs_dict_tt_prodmp['phase_generator_kwargs']['delay_bound'] = [0.05, 0.15] kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try - #kwargs_dict_tt_prodmp['basis_generator_kwargs']['pre_compute_length_factor'] = 5 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 diff --git a/fancy_gym/envs/mujoco/__init__.py b/fancy_gym/envs/mujoco/__init__.py index 2e84dab..ff51711 100644 --- a/fancy_gym/envs/mujoco/__init__.py +++ b/fancy_gym/envs/mujoco/__init__.py @@ -8,4 +8,4 @@ from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .box_pushing.box_pushing_env import BoxPushingDense, BoxPushingTemporalSparse, BoxPushingTemporalSpatialSparse -from .table_tennis.table_tennis_env import TableTennisEnv +from .table_tennis.table_tennis_env import TableTennisEnv, TableTennisWind, TableTennisGoalSwitching diff --git a/fancy_gym/envs/mujoco/table_tennis/__init__.py b/fancy_gym/envs/mujoco/table_tennis/__init__.py index 989b5a9..1438432 100644 --- a/fancy_gym/envs/mujoco/table_tennis/__init__.py +++ b/fancy_gym/envs/mujoco/table_tennis/__init__.py @@ -1 +1 @@ -from .mp_wrapper import MPWrapper \ No newline at end of file +from .mp_wrapper import TT_MPWrapper, TTVelObs_MPWrapper diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index 1da8de5..bd7cbf7 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -6,7 +6,7 @@ from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound -class MPWrapper(RawInterfaceWrapper): +class TT_MPWrapper(RawInterfaceWrapper): # Random x goal + random init pos @property @@ -29,48 +29,26 @@ class MPWrapper(RawInterfaceWrapper): def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: return self.data.qvel[:7].copy() - def check_time_validity(self, action): - return action[0] <= tau_bound[1] and action[0] >= tau_bound[0] \ - and action[1] <= delay_bound[1] and action[1] >= delay_bound[0] + def preprocessing_and_validity_callback(self, action, pos_traj, vel_traj): + return self.check_traj_validity(action, pos_traj, vel_traj) - def time_invalid_traj_callback(self, action, pos_traj, vel_traj) \ - -> Tuple[np.ndarray, float, bool, dict]: - tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) - delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) - invalid_penalty = tau_invalid_penalty + delay_invalid_penalty - obs = np.concatenate([self.get_obs(), np.array([0])]) - return obs, -invalid_penalty, True, { - "hit_ball": [False], - "ball_returned_success": [False], - "land_dist_error": [10.], - "is_success": [False], - 'trajectory_length': 1, - "num_steps": [1] - } + def set_episode_arguments(self, action, pos_traj, vel_traj): + return pos_traj, vel_traj - def episode_callback(self, action, pos_traj, vel_traj): - time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ - or action[1] > delay_bound[1] or action[1] < delay_bound[0] - if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): - return False - return True - - def invalid_traj_callback(self, action, pos_traj: np.ndarray, vel_traj: np.ndarray, + def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray, return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: - tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) - delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) - violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) - violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) - invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ - violate_high_bound_error + violate_low_bound_error - obs = np.concatenate([self.get_obs(), np.array([0])]) - if return_contextual_obs: - obs = self.get_obs() - return obs, -invalid_penalty, True, { - "hit_ball": [False], - "ball_returned_success": [False], - "land_dist_error": [10.], - "is_success": [False], - 'trajectory_length': 1, - "num_steps": [1] - } + return self.get_invalid_traj_step_return(action, pos_traj, vel_traj, return_contextual_obs) + +class TTVelObs_MPWrapper(TT_MPWrapper): + + @property + def context_mask(self): + return np.hstack([ + [False] * 7, # joints position + [False] * 7, # joints velocity + [True] * 2, # position ball x, y + [False] * 1, # position ball z + [True] * 3, # velocity ball x, y, z + [True] * 2, # target landing position + # [True] * 1, # time + ]) \ No newline at end of file diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 23a8668..2599006 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -5,6 +5,7 @@ from gym import utils, spaces from gym.envs.mujoco import MujocoEnv from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import check_init_state_validity, magnus_force +from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, jnt_pos_high, delay_bound, tau_bound import mujoco @@ -21,13 +22,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): """ 7 DoF table tennis environment """ - def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4, - enable_switching_goal: bool = False, - enable_wind: bool = False, - enable_artifical_wind: bool = False, - enable_magnus: bool = False, - enable_air: bool = False): + goal_switching_step: int = None, + enable_artificial_wind: bool = False): utils.EzPickle.__init__(**locals()) self._steps = 0 @@ -47,12 +44,11 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self._ball_traj = [] self._racket_traj = [] + self._goal_switching_step = goal_switching_step - self._enable_goal_switching = enable_switching_goal + self._enable_artificial_wind = enable_artificial_wind - self._enable_artifical_wind = enable_artifical_wind - - self._artifical_force = 0. + self._artificial_force = 0. MujocoEnv.__init__(self, model_path=os.path.join(os.path.dirname(__file__), "assets", "xml", "table_tennis_env.xml"), @@ -62,20 +58,13 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.context_bounds = CONTEXT_BOUNDS_2DIMS elif ctxt_dim == 4: self.context_bounds = CONTEXT_BOUNDS_4DIMS - if self._enable_goal_switching: + if self._goal_switching_step is not None: self.context_bounds = CONTEXT_BOUNDS_SWICHING else: raise NotImplementedError self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) - # complex dynamics settings - if enable_air: - self.model.opt.density = 1.225 - self.model.opt.viscosity = 2.27e-5 - - self._enable_wind = enable_wind - self._enable_magnus = enable_magnus self._wind_vel = np.zeros(3) def _set_ids(self): @@ -92,9 +81,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): unstable_simulation = False - - if self._enable_goal_switching: - if self._steps == 99 and self.np_random.uniform(0, 1) < 0.5: + if self._steps == self._goal_switching_step and self.np_random.uniform(0, 1) < 0.5: new_goal_pos = self._generate_goal_pos(random=True) new_goal_pos[1] = -new_goal_pos[1] self._goal_pos = new_goal_pos @@ -102,8 +89,8 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): mujoco.mj_forward(self.model, self.data) for _ in range(self.frame_skip): - if self._enable_artifical_wind: - self.data.qfrc_applied[-2] = self._artifical_force + if self._enable_artificial_wind: + self.data.qfrc_applied[-2] = self._artificial_force try: self.do_simulation(action, 1) except Exception as e: @@ -163,7 +150,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def reset_model(self): self._steps = 0 self._init_ball_state = self._generate_valid_init_ball(random_pos=True, random_vel=False) - # self._init_ball_state[2] = 1.85 self._goal_pos = self._generate_goal_pos(random=True) self.data.joint("tar_x").qpos = self._init_ball_state[0] self.data.joint("tar_y").qpos = self._init_ball_state[1] @@ -172,19 +158,16 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_y").qvel = self._init_ball_state[4] self.data.joint("tar_z").qvel = self._init_ball_state[5] - if self._enable_artifical_wind: - self._artifical_force = self.np_random.uniform(low=-0.1, high=0.1) + if self._enable_artificial_wind: + self._artificial_force = self.np_random.uniform(low=-0.1, high=0.1) self.model.body_pos[5] = np.concatenate([self._goal_pos, [0.77]]) self.data.qpos[:7] = np.array([0., 0., 0., 1.5, 0., 0., 1.5]) + self.data.qvel[:7] = np.zeros(7) mujoco.mj_forward(self.model, self.data) - if self._enable_wind: - self._wind_vel[1] = self.np_random.uniform(low=-10, high=10, size=1) - self.model.opt.wind[:3] = self._wind_vel - self._hit_ball = False self._ball_land_on_table = False self._ball_contact_after_hit = False @@ -208,10 +191,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_x").qpos.copy(), self.data.joint("tar_y").qpos.copy(), self.data.joint("tar_z").qpos.copy(), - #self.data.joint("tar_x").qvel.copy(), - #self.data.joint("tar_y").qvel.copy(), - #self.data.joint("tar_z").qvel.copy(), - # self.data.body("target_ball").xvel.copy(), + # self.data.joint("tar_x").qvel.copy(), + # self.data.joint("tar_y").qvel.copy(), + # self.data.joint("tar_z").qvel.copy(), self._goal_pos.copy(), ]) return obs @@ -252,56 +234,54 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) return init_ball_state -def plot_ball_traj(x_traj, y_traj, z_traj): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111, projection='3d') - ax.plot(x_traj, y_traj, z_traj) - plt.show() + def _get_traj_invalid_reward(self, action, pos_traj, vel_traj): + tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) + delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) + violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) + violate_low_bound_error = np.mean(np.maximum(jnt_pos_low - pos_traj, 0)) + invalid_penalty = tau_invalid_penalty + delay_invalid_penalty + \ + violate_high_bound_error + violate_low_bound_error + return -invalid_penalty -def plot_ball_traj_2d(x_traj, y_traj): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(x_traj, y_traj) - plt.show() + def get_invalid_traj_step_return(self, action, pos_traj, vel_traj, contextual_obs): + obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj + penalty = self._get_traj_invalid_reward(action, pos_traj, vel_traj) + return obs, penalty, True, { + "hit_ball": [False], + "ball_return_success": [False], + "land_dist_error": [False], + "trajectory_length": 1, + "num_steps": [1], + } -def plot_compare_trajs(traj1, traj2, title): - import matplotlib.pyplot as plt - fig = plt.figure() - ax = fig.add_subplot(111) - ax.plot(traj1, color='r', label='traj1') - ax.plot(traj2, color='b', label='traj2') - ax.set_title(title) - plt.legend() - plt.show() + @staticmethod + def check_traj_validity(action, pos_traj, vel_traj): + time_invalid = action[0] > tau_bound[1] or action[0] < tau_bound[0] \ + or action[1] > delay_bound[1] or action[1] < delay_bound[0] + if time_invalid or np.any(pos_traj > jnt_pos_high) or np.any(pos_traj < jnt_pos_low): + return False, pos_traj, vel_traj + return True, pos_traj, vel_traj -if __name__ == "__main__": - env_air = TableTennisEnv(enable_air=False, enable_wind=False, enable_artifical_wind=True) - env_no_air = TableTennisEnv(enable_air=False, enable_wind=False) - for _ in range(10): - obs1 = env_air.reset() - obs2 = env_no_air.reset() - # obs2 = env_with_air.reset() - air_x_pos = [] - no_air_x_pos = [] - # y_pos = [] - # z_pos = [] - # x_vel = [] - # y_vel = [] - # z_vel = [] - for _ in range(2000): - env_air.render("human") - obs1, reward1, done1, info1 = env_air.step(np.zeros(7)) - obs2, reward2, done2, info2 = env_no_air.step(np.zeros(7)) - # # _, _, _, _ = env_no_air.step(np.zeros(7)) - air_x_pos.append(env_air.data.joint("tar_z").qpos[0]) - no_air_x_pos.append(env_no_air.data.joint("tar_z").qpos[0]) - # # z_pos.append(env.data.joint("tar_z").qpos[0]) - # # x_vel.append(env.data.joint("tar_x").qvel[0]) - # # y_vel.append(env.data.joint("tar_y").qvel[0]) - # # z_vel.append(env.data.joint("tar_z").qvel[0]) - # # print(reward) - if info1["num_steps"] == 150: - plot_compare_trajs(air_x_pos, no_air_x_pos, title="z_pos with/out wind") - break + +class TableTennisWind(TableTennisEnv): + def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4): + super().__init__(ctxt_dim=ctxt_dim, frame_skip=frame_skip, enable_artificial_wind=True) + + def _get_obs(self): + obs = np.concatenate([ + self.data.qpos.flat[:7].copy(), + self.data.qvel.flat[:7].copy(), + self.data.joint("tar_x").qpos.copy(), + self.data.joint("tar_y").qpos.copy(), + self.data.joint("tar_z").qpos.copy(), + self.data.joint("tar_x").qvel.copy(), + self.data.joint("tar_y").qvel.copy(), + self.data.joint("tar_z").qvel.copy(), + self._goal_pos.copy(), + ]) + return obs + + +class TableTennisGoalSwitching(TableTennisEnv): + def __init__(self, frame_skip: int = 4, goal_switching_step: int = 99): + super().__init__(frame_skip=frame_skip, goal_switching_step=goal_switching_step) diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index a58d08b..7d2edf3 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -155,7 +155,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) @@ -163,10 +163,15 @@ if __name__ == '__main__': # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) # example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) + # example_mp("TableTennisWindProMP-v0", seed=10, iterations=20, render=render) + # example_mp("TableTennisGoalSwitchingProMP-v0", seed=10, iterations=20, render=render) # ProDMP # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - example_mp("TableTennis4DProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennis4DProDMP-v0", seed=10, iterations=2000, render=render) + # example_mp("TableTennisWindProDMP-v0", seed=10, iterations=20, render=render) + # example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=20, render=render) + # Altered basis functions # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) diff --git a/fancy_gym/utils/make_env_helpers.py b/fancy_gym/utils/make_env_helpers.py index 3c73ba9..2e04d71 100644 --- a/fancy_gym/utils/make_env_helpers.py +++ b/fancy_gym/utils/make_env_helpers.py @@ -168,11 +168,11 @@ def make_bb( # set tau bounds to minimum of two env steps otherwise computing the velocity is not possible. # maximum is full duration of one episode. - if phase_kwargs.get('learn_tau'): + if phase_kwargs.get('learn_tau') and phase_kwargs.get('tau_bound') is None: phase_kwargs["tau_bound"] = [env.dt * 2, black_box_kwargs['duration']] # Max delay is full duration minus two steps due to above reason - if phase_kwargs.get('learn_delay'): + if phase_kwargs.get('learn_delay') and phase_kwargs.get('delay_bound') is None: phase_kwargs["delay_bound"] = [0, black_box_kwargs['duration'] - env.dt * 2] phase_gen = get_phase_generator(**phase_kwargs) From a9a1d054977e9ed0231959d04b332812ab37ec85 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 11:46:09 +0100 Subject: [PATCH 22/27] merge master into table-tennis-dev branch --- fancy_gym/black_box/black_box_wrapper.py | 9 +++------ .../examples/examples_movement_primitives.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 4ff685a..d5bd7e6 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -145,11 +145,10 @@ class BlackBoxWrapper(gym.ObservationWrapper): def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" - # TODO remove this part, right now only needed for beer pong - # mp_params, env_spec_params, proceed = self.env.episode_callback(action, self.traj_gen) position, velocity = self.get_trajectory(action) + position, velocity = self.env.set_episode_arguments(action, position, velocity) traj_is_valid = self.env.preprocessing_and_validity_callback(action, position, velocity) - # insert validation here + trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) if self.verbose >= 2: @@ -167,8 +166,6 @@ class BlackBoxWrapper(gym.ObservationWrapper): else: self.plan_steps += 1 for t, (pos, vel) in enumerate(zip(position, velocity)): - current_pos = self.current_pos - current_vel = self.current_vel step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) obs, c_reward, done, info = self.env.step(c_action) @@ -186,7 +183,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times): diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 7d2edf3..b9f82de 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -161,16 +161,16 @@ if __name__ == '__main__': # ProMP # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) - # example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) - # example_mp("TableTennisWindProMP-v0", seed=10, iterations=20, render=render) - # example_mp("TableTennisGoalSwitchingProMP-v0", seed=10, iterations=20, render=render) + example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisWindProMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisGoalSwitchingProMP-v0", seed=10, iterations=20, render=render) # ProDMP - # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - example_mp("TableTennis4DProDMP-v0", seed=10, iterations=2000, render=render) - # example_mp("TableTennisWindProDMP-v0", seed=10, iterations=20, render=render) - # example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=20, render=render) + example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + example_mp("TableTennis4DProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisWindProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=20, render=render) # Altered basis functions # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) From 55df1e0ef6e985d3a4ac357849a516042089a544 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 13:16:37 +0100 Subject: [PATCH 23/27] fix minor bug --- fancy_gym/black_box/black_box_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index d5bd7e6..9d138cc 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -147,7 +147,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): position, velocity = self.get_trajectory(action) position, velocity = self.env.set_episode_arguments(action, position, velocity) - traj_is_valid = self.env.preprocessing_and_validity_callback(action, position, velocity) + traj_is_valid, position, velocity = self.env.preprocessing_and_validity_callback(action, position, velocity) trajectory_length = len(position) rewards = np.zeros(shape=(trajectory_length,)) @@ -159,7 +159,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): infos = dict() done = False - if not traj_is_valid: + if traj_is_valid is False: obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity, self.return_context_observation) return self.observation(obs), trajectory_return, done, infos From fe2d8fec91c3fd2e359b700000a946ff2c74b06c Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 13:22:45 +0100 Subject: [PATCH 24/27] delete unused argument --- fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py | 2 +- fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py index bd7cbf7..e33ed6c 100644 --- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py @@ -37,7 +37,7 @@ class TT_MPWrapper(RawInterfaceWrapper): def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_traj: np.ndarray, return_contextual_obs: bool) -> Tuple[np.ndarray, float, bool, dict]: - return self.get_invalid_traj_step_return(action, pos_traj, vel_traj, return_contextual_obs) + return self.get_invalid_traj_step_return(action, pos_traj, return_contextual_obs) class TTVelObs_MPWrapper(TT_MPWrapper): diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 2599006..734588a 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -234,7 +234,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) return init_ball_state - def _get_traj_invalid_reward(self, action, pos_traj, vel_traj): + def _get_traj_invalid_reward(self, action, pos_traj): tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) @@ -243,9 +243,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): violate_high_bound_error + violate_low_bound_error return -invalid_penalty - def get_invalid_traj_step_return(self, action, pos_traj, vel_traj, contextual_obs): + def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj - penalty = self._get_traj_invalid_reward(action, pos_traj, vel_traj) + penalty = self._get_traj_invalid_reward(action, pos_traj) return obs, penalty, True, { "hit_ball": [False], "ball_return_success": [False], From fa9a35d470db2093ebaac36c9af81a8584c98193 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 13:33:24 +0100 Subject: [PATCH 25/27] fix typos --- fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 734588a..ad41a08 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -248,8 +248,9 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): penalty = self._get_traj_invalid_reward(action, pos_traj) return obs, penalty, True, { "hit_ball": [False], - "ball_return_success": [False], - "land_dist_error": [False], + "ball_returned_success": [False], + "land_dist_error": [10.], + "is_success": [False], "trajectory_length": 1, "num_steps": [1], } From 5744d339accf93e2a3da4b60e83329fb8b0fb7a1 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 14:04:11 +0100 Subject: [PATCH 26/27] change time limits for tt to 350 --- fancy_gym/envs/__init__.py | 1 - .../envs/mujoco/table_tennis/table_tennis_env.py | 2 +- fancy_gym/examples/examples_movement_primitives.py | 14 +++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index bbcdd91..2fddbf3 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -561,7 +561,6 @@ for _v in _versions: kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis'] = 3 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_tt_promp['basis_generator_kwargs']['num_basis_zero_goal'] = 1 - kwargs_dict_tt_promp['black_box_kwargs']['duration'] = 2. kwargs_dict_tt_promp['black_box_kwargs']['verbose'] = 2 register( id=_env_id, diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index ad41a08..3f30256 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -9,7 +9,7 @@ from fancy_gym.envs.mujoco.table_tennis.table_tennis_utils import jnt_pos_low, j import mujoco -MAX_EPISODE_STEPS_TABLE_TENNIS = 250 +MAX_EPISODE_STEPS_TABLE_TENNIS = 350 CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]]) CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65], diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index b9f82de..4aeeecc 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -155,22 +155,22 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) example_mp("TableTennisWindProMP-v0", seed=10, iterations=20, render=render) example_mp("TableTennisGoalSwitchingProMP-v0", seed=10, iterations=20, render=render) - # ProDMP - example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - example_mp("TableTennis4DProDMP-v0", seed=10, iterations=20, render=render) - example_mp("TableTennisWindProDMP-v0", seed=10, iterations=20, render=render) - example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=20, render=render) + # ProDMP with Replanning + # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + # example_mp("TableTennis4DProDMP-v0", seed=10, iterations=100, render=render) + # example_mp("TableTennisWindProDMP-v0", seed=10, iterations=100, render=render) + # example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=100, render=render) # Altered basis functions # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) From 5750f6eb3d972578765891465c6ebdad600a9388 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Thu, 1 Dec 2022 14:23:57 +0100 Subject: [PATCH 27/27] add replan name tag to replan envs & delete redundant settings --- fancy_gym/envs/__init__.py | 6 ++---- .../mujoco/table_tennis/table_tennis_env.py | 7 ++----- .../examples/examples_movement_primitives.py | 21 ++++++++----------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index 2fddbf3..ea9aec7 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -571,7 +571,7 @@ for _v in _versions: for _v in _versions: _name = _v.split("-") - _env_id = f'{_name[0]}ProDMP-{_name[1]}' + _env_id = f'{_name[0]}ReplanProDMP-{_name[1]}' kwargs_dict_tt_prodmp = deepcopy(DEFAULT_BB_DICT_ProDMP) if _v == 'TableTennisWind-v0': kwargs_dict_tt_prodmp['wrappers'].append(mujoco.table_tennis.TTVelObs_MPWrapper) @@ -580,8 +580,6 @@ for _v in _versions: kwargs_dict_tt_prodmp['name'] = _v kwargs_dict_tt_prodmp['controller_kwargs']['p_gains'] = 0.5 * np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]) kwargs_dict_tt_prodmp['controller_kwargs']['d_gains'] = 0.5 * np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['weights_scale'] = 1.0 - kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_scale'] = 1.0 kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['auto_scale_basis'] = False kwargs_dict_tt_prodmp['trajectory_generator_kwargs']['goal_offset'] = 1.0 kwargs_dict_tt_prodmp['phase_generator_kwargs']['tau_bound'] = [0.8, 1.5] @@ -590,7 +588,7 @@ for _v in _versions: kwargs_dict_tt_prodmp['phase_generator_kwargs']['learn_delay'] = True kwargs_dict_tt_prodmp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_tt_prodmp['basis_generator_kwargs']['alpha'] = 25. - kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 # 3.5, 4 to try + kwargs_dict_tt_prodmp['basis_generator_kwargs']['basis_bandwidth_factor'] = 3 kwargs_dict_tt_prodmp['phase_generator_kwargs']['alpha_phase'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['max_planning_times'] = 3 kwargs_dict_tt_prodmp['black_box_kwargs']['replanning_schedule'] = lambda pos, vel, obs, action, t : t % 50 == 0 diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py index 3f30256..dc717c2 100644 --- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py +++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py @@ -191,9 +191,6 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): self.data.joint("tar_x").qpos.copy(), self.data.joint("tar_y").qpos.copy(), self.data.joint("tar_z").qpos.copy(), - # self.data.joint("tar_x").qvel.copy(), - # self.data.joint("tar_y").qvel.copy(), - # self.data.joint("tar_z").qvel.copy(), self._goal_pos.copy(), ]) return obs @@ -234,7 +231,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): init_ball_state = self._generate_random_ball(random_pos=random_pos, random_vel=random_vel) return init_ball_state - def _get_traj_invalid_reward(self, action, pos_traj): + def _get_traj_invalid_penalty(self, action, pos_traj): tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]])) delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]])) violate_high_bound_error = np.mean(np.maximum(pos_traj - jnt_pos_high, 0)) @@ -245,7 +242,7 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle): def get_invalid_traj_step_return(self, action, pos_traj, contextual_obs): obs = self._get_obs() if contextual_obs else np.concatenate([self._get_obs(), np.array([0])]) # 0 for invalid traj - penalty = self._get_traj_invalid_reward(action, pos_traj) + penalty = self._get_traj_invalid_penalty(action, pos_traj) return obs, penalty, True, { "hit_ball": [False], "ball_returned_success": [False], diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 4aeeecc..b533e9c 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -155,25 +155,22 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP - # example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) + example_mp("HoleReacherDMP-v0", seed=10, iterations=5, render=render) # ProMP - # example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) - # example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) + example_mp("HoleReacherProMP-v0", seed=10, iterations=5, render=render) + example_mp("BoxPushingTemporalSparseProMP-v0", seed=10, iterations=1, render=render) example_mp("TableTennis4DProMP-v0", seed=10, iterations=20, render=render) - example_mp("TableTennisWindProMP-v0", seed=10, iterations=20, render=render) - example_mp("TableTennisGoalSwitchingProMP-v0", seed=10, iterations=20, render=render) # ProDMP with Replanning - # example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) - # example_mp("TableTennis4DProDMP-v0", seed=10, iterations=100, render=render) - # example_mp("TableTennisWindProDMP-v0", seed=10, iterations=100, render=render) - # example_mp("TableTennisGoalSwitchingProDMP-v0", seed=10, iterations=100, render=render) + example_mp("BoxPushingDenseReplanProDMP-v0", seed=10, iterations=4, render=render) + example_mp("TableTennis4DReplanProDMP-v0", seed=10, iterations=20, render=render) + example_mp("TableTennisWindReplanProDMP-v0", seed=10, iterations=20, render=render) # Altered basis functions - # obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) + obs1 = example_custom_mp("Reacher5dProMP-v0", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render)