From 8b88ce3476a9040eed01ae46ca1e38ddfc0a26bd Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 16:11:32 +0100 Subject: [PATCH 1/3] replaced all detpmp with promp --- README.md | 10 +- alr_envs/__init__.py | 2 +- alr_envs/alr/__init__.py | 77 ++----------- alr_envs/alr/classic_control/README.MD | 2 +- .../viapoint_reacher/viapoint_reacher.py | 1 - alr_envs/alr/mujoco/ball_in_a_cup/utils.py | 107 ------------------ alr_envs/alr/mujoco/beerpong/utils.py | 72 ------------ alr_envs/dmc/README.MD | 6 +- alr_envs/dmc/__init__.py | 51 ++++----- alr_envs/examples/examples_dmc.py | 4 +- alr_envs/examples/examples_metaworld.py | 4 +- .../examples/examples_motion_primitives.py | 4 +- alr_envs/examples/examples_open_ai.py | 10 +- alr_envs/examples/pd_control_gain_tuning.py | 6 +- alr_envs/meta/__init__.py | 30 +++-- alr_envs/open_ai/README.MD | 8 +- alr_envs/open_ai/__init__.py | 51 ++++----- alr_envs/utils/make_env_helpers.py | 47 +------- test/test_envs.py | 32 +++--- test/test_metaworld_envs.py | 4 +- 20 files changed, 121 insertions(+), 407 deletions(-) delete mode 100644 alr_envs/alr/mujoco/ball_in_a_cup/utils.py delete mode 100644 alr_envs/alr/mujoco/beerpong/utils.py diff --git a/README.md b/README.md index 860bddc..2232efa 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,14 @@ Besides, some custom environments we also provide support for the benchmark suit [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) (DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we -further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, +further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually). ## Motion Primitive Environments (Episodic environments) Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full -trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (DetPMP). The +trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (ProMP). The generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is, however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position, velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action @@ -82,7 +82,7 @@ trajectory. ```python import alr_envs -env = alr_envs.make('HoleReacherDetPMP-v0', seed=1) +env = alr_envs.make('HoleReacherProMP-v0', seed=1) # render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None). env.render() @@ -96,7 +96,7 @@ for i in range(5): ``` To show all available environments, we provide some additional convenience. Each value will return a dictionary with two -keys `DMP` and `DetPMP` that store a list of available environment names. +keys `DMP` and `ProMP` that store a list of available environment names. ```python import alr_envs @@ -194,7 +194,7 @@ mp_kwargs = {...} kwargs = {...} env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs) # OR for a deterministic ProMP (other mp_kwargs are required): -# env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) +# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) rewards = 0 obs = env.reset() diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 30fa7b8..858a66c 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,5 +1,5 @@ from alr_envs import dmc, meta, open_ai -from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_promp_env, make_rank +from alr_envs.utils.make_env_helpers import make, make_dmp_env, make_promp_env, make_rank from alr_envs.utils import make_dmc # Convenience function for all MP environments diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 03a986f..6853f0d 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -9,7 +9,7 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.balancing import BalancingEnv -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # Classic Control ## Simple Reacher @@ -213,8 +213,12 @@ for _v in _versions: "duration": 2, "alpha_phase": 2, "learn_goal": True, - "policy_type": "velocity", + "policy_type": "motor", "weights_scale": 50, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } } } ) @@ -233,33 +237,16 @@ for _v in _versions: "duration": 2, "policy_type": "motor", "weights_scale": 1, - "zero_start": True + "zero_start": True, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } } } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - _env_id = f'{_name[0]}DetPMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) - # Viapoint reacher register( id='ViaPointReacherDMP-v0', @@ -291,7 +278,7 @@ register( "num_dof": 5, "num_basis": 5, "duration": 2, - "policy_type": "motor", + "policy_type": "velocity", "weights_scale": 1, "zero_start": True } @@ -299,26 +286,6 @@ register( ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") -register( - id='ViaPointReacherDetPMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": "alr_envs:ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ViaPointReacherDetPMP-v0") - ## Hole Reacher _versions = ["v0", "v1", "v2"] for _v in _versions: @@ -363,23 +330,3 @@ for _v in _versions: } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - _env_id = f'HoleReacherDetPMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', - kwargs={ - "name": f"alr_envs:HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "width": 0.025, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/alr/classic_control/README.MD index ebe2101..0bd2f92 100644 --- a/alr_envs/alr/classic_control/README.MD +++ b/alr_envs/alr/classic_control/README.MD @@ -18,4 +18,4 @@ |`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35 |`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 -[//]: |`HoleReacherDetPMP-v0`| \ No newline at end of file +[//]: |`HoleReacherProMPP-v0`| \ No newline at end of file diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 748eb99..3a1f0e5 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -5,7 +5,6 @@ import matplotlib.pyplot as plt import numpy as np from gym.utils import seeding -from alr_envs.alr.classic_control.utils import check_self_collision from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/utils.py b/alr_envs/alr/mujoco/ball_in_a_cup/utils.py deleted file mode 100644 index 0dd972c..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/utils.py +++ /dev/null @@ -1,107 +0,0 @@ -from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper -from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper - - -def make_contextual_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="contextual_goal") - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def _make_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="simple") - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.2, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBallInACupEnv(reward_type="simple") - - env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True, off=-0.1) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_dmp_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = ALRBallInACupEnv(reward_type="simple") - - _env = DmpWrapper(_env, - num_dof=3, - num_basis=5, - duration=3.5, - post_traj_time=4.5, - bandwidth_factor=2.5, - dt=_env.dt, - learn_goal=False, - alpha_phase=3, - start_pos=_env.start_pos[1::2], - final_pos=_env.start_pos[1::2], - policy_type="motor", - weights_scale=100, - ) - - _env.seed(seed + rank) - return _env - - return _init diff --git a/alr_envs/alr/mujoco/beerpong/utils.py b/alr_envs/alr/mujoco/beerpong/utils.py deleted file mode 100644 index f43e881..0000000 --- a/alr_envs/alr/mujoco/beerpong/utils.py +++ /dev/null @@ -1,72 +0,0 @@ -from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper -from alr_envs.alr.mujoco.beerpong.beerpong import ALRBeerpongEnv -from alr_envs.alr.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple - - -def make_contextual_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnv() - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def _make_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnvSimple() - - env = DetPMPWrapper(env, num_dof=7, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.25, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init - - -def make_simple_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - env = ALRBeerpongEnvSimple() - - env = DetPMPWrapper(env, num_dof=3, num_basis=5, width=0.005, duration=3.5, dt=env.dt, post_traj_time=4.5, - policy_type="motor", weights_scale=0.5, zero_start=True, zero_goal=True) - - env.seed(seed + rank) - return env - - return _init diff --git a/alr_envs/dmc/README.MD b/alr_envs/dmc/README.MD index 791ee84..040a9a0 100644 --- a/alr_envs/dmc/README.MD +++ b/alr_envs/dmc/README.MD @@ -11,9 +11,9 @@ environments in order to use our Motion Primitive gym interface with them. |Name| Description|Trajectory Horizon|Action Dimension|Context Dimension |---|---|---|---|---| -|`dmc_ball_in_cup-catch_detpmp-v0`| A DetPmP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 +|`dmc_ball_in_cup-catch_promp-v0`| A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 |`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2 -|`dmc_reacher-easy_detpmp-v0`| A DetPmP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 +|`dmc_reacher-easy_promp-v0`| A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 |`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4 -|`dmc_reacher-hard_detpmp-v0`| A DetPmP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4 +|`dmc_reacher-hard_promp-v0`| A ProMP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4 |`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py index ca6469e..ac34415 100644 --- a/alr_envs/dmc/__init__.py +++ b/alr_envs/dmc/__init__.py @@ -1,6 +1,6 @@ from . import manipulation, suite -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} from gym.envs.registration import register @@ -34,8 +34,8 @@ register( ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") register( - id=f'dmc_ball_in_cup-catch_detpmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id=f'dmc_ball_in_cup-catch_promp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"ball_in_cup-catch", "time_limit": 20, @@ -45,7 +45,6 @@ register( "num_dof": 2, "num_basis": 5, "duration": 20, - "width": 0.025, "policy_type": "motor", "zero_start": True, "policy_kwargs": { @@ -55,7 +54,7 @@ register( } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_ball_in_cup-catch_detpmp-v0") +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") register( id=f'dmc_reacher-easy_dmp-v0', @@ -86,8 +85,8 @@ register( ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") register( - id=f'dmc_reacher-easy_detpmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id=f'dmc_reacher-easy_promp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"reacher-easy", "time_limit": 20, @@ -97,7 +96,6 @@ register( "num_dof": 2, "num_basis": 5, "duration": 20, - "width": 0.025, "policy_type": "motor", "weights_scale": 0.2, "zero_start": True, @@ -108,7 +106,7 @@ register( } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_reacher-easy_detpmp-v0") +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") register( id=f'dmc_reacher-hard_dmp-v0', @@ -139,8 +137,8 @@ register( ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") register( - id=f'dmc_reacher-hard_detpmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id=f'dmc_reacher-hard_promp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"reacher-hard", "time_limit": 20, @@ -150,7 +148,6 @@ register( "num_dof": 2, "num_basis": 5, "duration": 20, - "width": 0.025, "policy_type": "motor", "weights_scale": 0.2, "zero_start": True, @@ -161,7 +158,7 @@ register( } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_reacher-hard_detpmp-v0") +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] @@ -196,10 +193,10 @@ for _task in _dmc_cartpole_tasks: ) ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - _env_id = f'dmc_cartpole-{_task}_detpmp-v0' + _env_id = f'dmc_cartpole-{_task}_promp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"cartpole-{_task}", # "time_limit": 1, @@ -210,7 +207,6 @@ for _task in _dmc_cartpole_tasks: "num_dof": 1, "num_basis": 5, "duration": 10, - "width": 0.025, "policy_type": "motor", "weights_scale": 0.2, "zero_start": True, @@ -221,7 +217,7 @@ for _task in _dmc_cartpole_tasks: } } ) - ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _env_id = f'dmc_cartpole-two_poles_dmp-v0' register( @@ -253,10 +249,10 @@ register( ) ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) -_env_id = f'dmc_cartpole-two_poles_detpmp-v0' +_env_id = f'dmc_cartpole-two_poles_promp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"cartpole-two_poles", # "time_limit": 1, @@ -267,7 +263,6 @@ register( "num_dof": 1, "num_basis": 5, "duration": 10, - "width": 0.025, "policy_type": "motor", "weights_scale": 0.2, "zero_start": True, @@ -278,7 +273,7 @@ register( } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _env_id = f'dmc_cartpole-three_poles_dmp-v0' register( @@ -310,10 +305,10 @@ register( ) ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) -_env_id = f'dmc_cartpole-three_poles_detpmp-v0' +_env_id = f'dmc_cartpole-three_poles_promp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"cartpole-three_poles", # "time_limit": 1, @@ -324,7 +319,6 @@ register( "num_dof": 1, "num_basis": 5, "duration": 10, - "width": 0.025, "policy_type": "motor", "weights_scale": 0.2, "zero_start": True, @@ -335,7 +329,7 @@ register( } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # DeepMind Manipulation @@ -364,8 +358,8 @@ register( ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") register( - id=f'dmc_manipulation-reach_site_detpmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id=f'dmc_manipulation-reach_site_promp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": f"manipulation-reach_site_features", # "time_limit": 1, @@ -375,11 +369,10 @@ register( "num_dof": 9, "num_basis": 5, "duration": 10, - "width": 0.025, "policy_type": "velocity", "weights_scale": 0.2, "zero_start": True, } } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_manipulation-reach_site_detpmp-v0") +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py index 2d310c4..d223d3c 100644 --- a/alr_envs/examples/examples_dmc.py +++ b/alr_envs/examples/examples_dmc.py @@ -84,7 +84,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): } env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) # OR for a deterministic ProMP (other mp_kwargs are required, see metaworld_examples): - # env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) + # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) # This renders the full MP trajectory # It is only required to call render() once in the beginning, which renders every consecutive trajectory. @@ -128,7 +128,7 @@ if __name__ == '__main__': example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render) # Gym + DMC hybrid task provided in the MP framework - example_dmc("dmc_ball_in_cup-catch_detpmp-v0", seed=10, iterations=1, render=render) + example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render) # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is already registered above diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py index e88ed6c..9ead50c 100644 --- a/alr_envs/examples/examples_metaworld.py +++ b/alr_envs/examples/examples_metaworld.py @@ -76,7 +76,7 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): "policy_type": "metaworld", # custom controller type for metaworld environments } - env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # OR for a DMP (other mp_kwargs are required, see dmc_examples): # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) @@ -122,7 +122,7 @@ if __name__ == '__main__': example_dmc("button-press-v2", seed=10, iterations=500, render=render) # MP + MetaWorld hybrid task provided in the our framework - example_dmc("ButtonPressDetPMP-v2", seed=10, iterations=1, render=render) + example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render) # Custom MetaWorld task example_custom_dmc_and_mp(seed=10, iterations=1, render=render) diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index 0df05c1..1a679df 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -126,7 +126,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): } env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # OR for a deterministic ProMP: - # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + # env = make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) if render: env.render(mode="human") @@ -147,7 +147,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py index 9f90be5..dc0c558 100644 --- a/alr_envs/examples/examples_open_ai.py +++ b/alr_envs/examples/examples_open_ai.py @@ -6,7 +6,7 @@ def example_mp(env_name, seed=1): Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. For more information on motion primitive specific stuff, look at the mp examples. Args: - env_name: DetPMP env_id + env_name: ProMP env_id seed: seed Returns: @@ -35,7 +35,7 @@ if __name__ == '__main__': # example_mp("ReacherDMP-v2") # DetProMP - example_mp("ContinuousMountainCarDetPMP-v0") - example_mp("ReacherDetPMP-v2") - example_mp("FetchReachDenseDetPMP-v1") - example_mp("FetchSlideDenseDetPMP-v1") + example_mp("ContinuousMountainCarProMP-v0") + example_mp("ReacherProMP-v2") + example_mp("FetchReachDenseProMP-v1") + example_mp("FetchSlideDenseProMP-v1") diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py index 3fb8251..90aac11 100644 --- a/alr_envs/examples/pd_control_gain_tuning.py +++ b/alr_envs/examples/pd_control_gain_tuning.py @@ -2,7 +2,7 @@ import numpy as np from matplotlib import pyplot as plt from alr_envs import dmc, meta -from alr_envs.utils.make_env_helpers import make_detpmp_env +from alr_envs.utils.make_env_helpers import make_promp_env # This might work for some environments, however, please verify either way the correct trajectory information # for your environment are extracted below @@ -26,8 +26,8 @@ mp_kwargs = { kwargs = dict(time_limit=2, episode_length=100) -env = make_detpmp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, - **kwargs) +env = make_promp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, + **kwargs) # Plot difference between real trajectory and target MP trajectory env.reset() diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index 9db0689..5651224 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -3,7 +3,7 @@ from gym import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper -ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} +ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # MetaWorld @@ -12,10 +12,10 @@ _goal_change_envs = ["assembly-v2", "pick-out-of-hole-v2", "plate-slide-v2", "pl for _task in _goal_change_envs: task_id_split = _task.split("-") name = "".join([s.capitalize() for s in task_id_split[:-1]]) - _env_id = f'{name}DetPMP-{task_id_split[-1]}' + _env_id = f'{name}ProMP-{task_id_split[-1]}' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": _task, "wrappers": [goal_change_mp_wrapper.MPWrapper], @@ -24,22 +24,21 @@ for _task in _goal_change_envs: "num_basis": 5, "duration": 6.25, "post_traj_time": 0, - "width": 0.025, "zero_start": True, "policy_type": "metaworld", } } ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"] for _task in _object_change_envs: task_id_split = _task.split("-") name = "".join([s.capitalize() for s in task_id_split[:-1]]) - _env_id = f'{name}DetPMP-{task_id_split[-1]}' + _env_id = f'{name}ProMP-{task_id_split[-1]}' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": _task, "wrappers": [object_change_mp_wrapper.MPWrapper], @@ -48,13 +47,12 @@ for _task in _object_change_envs: "num_basis": 5, "duration": 6.25, "post_traj_time": 0, - "width": 0.025, "zero_start": True, "policy_type": "metaworld", } } ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2", "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2", @@ -70,10 +68,10 @@ _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press for _task in _goal_and_object_change_envs: task_id_split = _task.split("-") name = "".join([s.capitalize() for s in task_id_split[:-1]]) - _env_id = f'{name}DetPMP-{task_id_split[-1]}' + _env_id = f'{name}ProMP-{task_id_split[-1]}' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": _task, "wrappers": [goal_object_change_mp_wrapper.MPWrapper], @@ -82,22 +80,21 @@ for _task in _goal_and_object_change_envs: "num_basis": 5, "duration": 6.25, "post_traj_time": 0, - "width": 0.025, "zero_start": True, "policy_type": "metaworld", } } ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_endeffector_change_envs = ["basketball-v2"] for _task in _goal_and_endeffector_change_envs: task_id_split = _task.split("-") name = "".join([s.capitalize() for s in task_id_split[:-1]]) - _env_id = f'{name}DetPMP-{task_id_split[-1]}' + _env_id = f'{name}ProMP-{task_id_split[-1]}' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": _task, "wrappers": [goal_endeffector_change_mp_wrapper.MPWrapper], @@ -106,10 +103,9 @@ for _task in _goal_and_endeffector_change_envs: "num_basis": 5, "duration": 6.25, "post_traj_time": 0, - "width": 0.025, "zero_start": True, "policy_type": "metaworld", } } ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/alr_envs/open_ai/README.MD b/alr_envs/open_ai/README.MD index 985c093..62d1f20 100644 --- a/alr_envs/open_ai/README.MD +++ b/alr_envs/open_ai/README.MD @@ -8,7 +8,7 @@ These environments are wrapped-versions of their OpenAI-gym counterparts. |Name| Description|Trajectory Horizon|Action Dimension|Context Dimension |---|---|---|---|---| -|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 -|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 -|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 -|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 +|`ContinuousMountainCarProMP-v0`| A ProMP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 +|`ReacherProMP-v2`| A ProMP wrapped version of the Reacher-v2 environment. | 50 | 2 +|`FetchSlideDenseProMP-v1`| A ProMP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 +|`FetchReachDenseProMP-v1`| A ProMP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py index 63083ca..41b770f 100644 --- a/alr_envs/open_ai/__init__.py +++ b/alr_envs/open_ai/__init__.py @@ -3,7 +3,7 @@ from gym.wrappers import FlattenObservation from . import classic_control, mujoco, robotics -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # Short Continuous Mountain Car register( @@ -16,8 +16,8 @@ register( # Open AI # Classic Control register( - id='ContinuousMountainCarDetPMP-v1', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='ContinuousMountainCarProMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "alr_envs:MountainCarContinuous-v1", "wrappers": [classic_control.continuous_mountain_car.MPWrapper], @@ -26,7 +26,6 @@ register( "num_basis": 4, "duration": 2, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "motor", "policy_kwargs": { @@ -36,11 +35,11 @@ register( } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ContinuousMountainCarDetPMP-v1") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ContinuousMountainCarProMP-v1") register( - id='ContinuousMountainCarDetPMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='ContinuousMountainCarProMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.classic_control:MountainCarContinuous-v0", "wrappers": [classic_control.continuous_mountain_car.MPWrapper], @@ -49,7 +48,6 @@ register( "num_basis": 4, "duration": 19.98, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "motor", "policy_kwargs": { @@ -59,11 +57,11 @@ register( } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ContinuousMountainCarDetPMP-v0") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ContinuousMountainCarProMP-v0") register( - id='ReacherDetPMP-v2', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='ReacherProMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.mujoco:Reacher-v2", "wrappers": [mujoco.reacher_v2.MPWrapper], @@ -72,7 +70,6 @@ register( "num_basis": 6, "duration": 1, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "motor", "policy_kwargs": { @@ -82,11 +79,11 @@ register( } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ReacherDetPMP-v2") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ReacherProMP-v2") register( - id='FetchSlideDenseDetPMP-v1', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='FetchSlideDenseProMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchSlideDense-v1", "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], @@ -95,17 +92,16 @@ register( "num_basis": 5, "duration": 2, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "position" } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchSlideDenseDetPMP-v1") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchSlideDenseProMP-v1") register( - id='FetchSlideDetPMP-v1', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='FetchSlideProMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchSlide-v1", "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], @@ -114,17 +110,16 @@ register( "num_basis": 5, "duration": 2, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "position" } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchSlideDetPMP-v1") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchSlideProMP-v1") register( - id='FetchReachDenseDetPMP-v1', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='FetchReachDenseProMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchReachDense-v1", "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], @@ -133,17 +128,16 @@ register( "num_basis": 5, "duration": 2, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "position" } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchReachDenseDetPMP-v1") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchReachDenseProMP-v1") register( - id='FetchReachDetPMP-v1', - entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + id='FetchReachProMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchReach-v1", "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], @@ -152,10 +146,9 @@ register( "num_basis": 5, "duration": 2, "post_traj_time": 0, - "width": 0.02, "zero_start": True, "policy_type": "position" } } ) -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchReachDetPMP-v1") +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("FetchReachProMP-v1") diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 19f54d6..4300439 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -1,3 +1,4 @@ +import warnings from typing import Iterable, Type, Union import gym @@ -5,7 +6,6 @@ import numpy as np from gym.envs.registration import EnvSpec from mp_env_api import MPEnvWrapper -from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper @@ -48,6 +48,11 @@ def make(env_id: str, seed, **kwargs): Returns: Gym environment """ + if any([det_pmp in env_id for det_pmp in ["DetPMP", "detpmp"]]): + warnings.warn("DetPMP is deprecated and converted to ProMP") + env_id = env_id.replace("DetPMP", "ProMP") + env_id = env_id.replace("detpmp", "promp") + try: # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment. if env_id.startswith("dmc"): @@ -153,26 +158,6 @@ def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwar return ProMPWrapper(_env, **mp_kwargs) -def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): - """ - This can also be used standalone for manually building a custom Det ProMP environment. - Args: - env_id: base_env_name, - wrappers: list of wrappers (at least an MPEnvWrapper), - mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} - - Returns: Det ProMP wrapped gym env - - """ - _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) - - _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) - - _verify_dof(_env, mp_kwargs.get("num_dof")) - - return DetPMPWrapper(_env, **mp_kwargs) - - def make_dmp_env_helper(**kwargs): """ Helper function for registering a DMP gym environments. @@ -212,26 +197,6 @@ def make_promp_env_helper(**kwargs): mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) -def make_detpmp_env_helper(**kwargs): - """ - Helper function for registering ProMP gym environments. - This can also be used standalone for manually building a custom ProMP environment. - Args: - **kwargs: expects at least the following: - { - "name": base_env_name, - "wrappers": list of wrappers (at least an MPEnvWrapper), - "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int} - } - - Returns: DMP wrapped gym env - - """ - seed = kwargs.pop("seed", None) - return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, - mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) - - def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): """ When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. diff --git a/test/test_envs.py b/test/test_envs.py index f8d7269..b3263ba 100644 --- a/test/test_envs.py +++ b/test/test_envs.py @@ -98,8 +98,8 @@ class TestMPEnvironments(unittest.TestCase): with self.subTest(msg=env_id): self._run_env(env_id) - with self.subTest(msg="DetPMP"): - for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg="ProMP"): + for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: with self.subTest(msg=env_id): self._run_env(env_id) @@ -110,8 +110,8 @@ class TestMPEnvironments(unittest.TestCase): with self.subTest(msg=env_id): self._run_env(env_id) - with self.subTest(msg="DetPMP"): - for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg="ProMP"): + for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: with self.subTest(msg=env_id): self._run_env(env_id) @@ -122,8 +122,8 @@ class TestMPEnvironments(unittest.TestCase): with self.subTest(msg=env_id): self._run_env(env_id) - with self.subTest(msg="DetPMP"): - for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg="ProMP"): + for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: with self.subTest(msg=env_id): self._run_env(env_id) @@ -134,8 +134,8 @@ class TestMPEnvironments(unittest.TestCase): with self.subTest(msg=env_id): self._run_env(env_id) - with self.subTest(msg="DetPMP"): - for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg="ProMP"): + for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: with self.subTest(msg=env_id): self._run_env(env_id) @@ -143,29 +143,29 @@ class TestMPEnvironments(unittest.TestCase): """Tests that identical seeds produce identical trajectories for ALR MP Envs.""" with self.subTest(msg="DMP"): self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="DetPMP"): - self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + with self.subTest(msg="ProMP"): + self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) def test_openai_environment_determinism(self): """Tests that identical seeds produce identical trajectories for OpenAI gym MP Envs.""" with self.subTest(msg="DMP"): self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="DetPMP"): - self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + with self.subTest(msg="ProMP"): + self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) def test_dmc_environment_determinism(self): """Tests that identical seeds produce identical trajectories for DMC MP Envs.""" with self.subTest(msg="DMP"): self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="DetPMP"): - self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + with self.subTest(msg="ProMP"): + self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) def test_metaworld_environment_determinism(self): """Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" with self.subTest(msg="DMP"): self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="DetPMP"): - self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + with self.subTest(msg="ProMP"): + self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) if __name__ == '__main__': diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index a6bd244..b84ba3c 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -81,13 +81,13 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - def test_dmc_functionality(self): + def test_metaworld_functionality(self): """Tests that environments runs without errors using random actions.""" for env_id in ALL_ENVS: with self.subTest(msg=env_id): self._run_env(env_id) - def test_dmc_determinism(self): + def test_metaworld_determinism(self): """Tests that identical seeds produce identical trajectories.""" seed = 0 # Iterate over two trajectories, which should have the same state and action sequence From a0af74358562b69ddf8c7de6f7d276d56433bdfd Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Mon, 6 Dec 2021 13:43:45 +0100 Subject: [PATCH 2/3] updated table tennis and beerpong for promp usage --- alr_envs/alr/__init__.py | 64 +++++++++++++------ alr_envs/alr/classic_control/README.MD | 5 +- alr_envs/alr/mujoco/__init__.py | 2 +- alr_envs/alr/mujoco/beerpong/beerpong.py | 10 +-- .../alr/mujoco/beerpong/beerpong_reward.py | 2 + .../mujoco/beerpong/beerpong_reward_staged.py | 55 +++++++--------- .../alr/mujoco/beerpong/beerpong_simple.py | 11 ++-- alr_envs/alr/mujoco/table_tennis/tt_gym.py | 44 +++++++------ 8 files changed, 107 insertions(+), 86 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index f835328..e2ba068 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -198,14 +198,19 @@ register( ## Table Tennis register(id='TableTennis2DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + entry_point='alr_envs.alr.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim':2}) + kwargs={'ctxt_dim': 2}) + +register(id='TableTennis2DCtxt-v1', + entry_point='alr_envs.alr.mujoco:TTEnvGym', + max_episode_steps=1750, + kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register(id='TableTennis4DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TT_Env_Gym', + entry_point='alr_envs.alr.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim':4}) + kwargs={'ctxt_dim': 4}) ## BeerPong difficulties = ["simple", "intermediate", "hard", "hardest"] @@ -369,13 +374,10 @@ register( "mp_kwargs": { "num_dof": 7, "num_basis": 2, - "n_zero_bases": 2, - "duration": 0.5, - "post_traj_time": 2.5, - # "width": 0.01, - # "off": 0.01, + "duration": 1, + "post_traj_time": 2, "policy_type": "motor", - "weights_scale": 0.08, + "weights_scale": 0.2, "zero_start": True, "zero_goal": False, "policy_kwargs": { @@ -388,22 +390,46 @@ register( ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0") ## Table Tennis +ctxt_dim = [2, 4] +for _v, cd in enumerate(ctxt_dim): + _env_id = f'TableTennisProMP-v{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd), + "wrappers": [mujoco.table_tennis.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "duration": 1.25, + "post_traj_time": 4.5, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + register( - id='TableTennisProMP-v0', + id='TableTennisProMP-v2', entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', kwargs={ - "name": "alr_envs:TableTennis4DCtxt-v0", + "name": "alr_envs:TableTennis2DCtxt-v1", "wrappers": [mujoco.table_tennis.MPWrapper], "mp_kwargs": { "num_dof": 7, "num_basis": 2, - "n_zero_bases": 2, - "duration": 1.25, - "post_traj_time": 4.5, - # "width": 0.01, - # "off": 0.01, + "duration": 1., + "post_traj_time": 2.5, "policy_type": "motor", - "weights_scale": 1.0, + "weights_scale": 0.2, "zero_start": True, "zero_goal": False, "policy_kwargs": { @@ -413,4 +439,4 @@ register( } } ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v0") +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("TableTennisProMP-v2") diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/alr/classic_control/README.MD index 0bd2f92..bd1b68b 100644 --- a/alr_envs/alr/classic_control/README.MD +++ b/alr_envs/alr/classic_control/README.MD @@ -13,9 +13,6 @@ |---|---|---|---|---| |`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 |`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 -|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 -|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupSimple-v0` task where only 3 joints are actuated. | 4000 | 15 -|`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35 -|`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 +|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 [//]: |`HoleReacherProMPP-v0`| \ No newline at end of file diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 30e1e7c..cdb3cde 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -2,5 +2,5 @@ from .reacher.alr_reacher import ALRReacherEnv from .reacher.balancing import BalancingEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv -from .table_tennis.tt_gym import TT_Env_Gym +from .table_tennis.tt_gym import TTEnvGym from .beerpong.beerpong import ALRBeerBongEnv \ No newline at end of file diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index a10e54a..a86f0a7 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -27,10 +27,10 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self.ball_site_id = 0 self.ball_id = 11 - self._release_step = 100 # time step of ball release + self._release_step = 175 # time step of ball release - self.sim_time = 4 # seconds - self.ep_length = 600 # based on 5 seconds with dt = 0.005 int(self.sim_time / self.dt) + self.sim_time = 3 # seconds + self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt) self.cup_table_id = 10 if noisy: @@ -143,7 +143,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): q_vel=self.sim.data.qvel[0:7].ravel().copy(), ball_pos=ball_pos, ball_vel=ball_vel, - is_success=success, + success=success, is_collided=is_collided, sim_crash=crash) def check_traj_in_joint_limits(self): @@ -171,7 +171,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): if __name__ == "__main__": - env = ALRBeerBongEnv(reward_type="no_context", difficulty='hardest') + env = ALRBeerBongEnv(reward_type="staged", difficulty='hardest') # env.configure(ctxt) env.reset() diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py index 3896e82..dc39ca8 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py @@ -71,6 +71,7 @@ class BeerPongReward: goal_pos = env.sim.data.site_xpos[self.goal_id] ball_pos = env.sim.data.body_xpos[self.ball_id] + ball_vel = env.sim.data.body_xvelp[self.ball_id] goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) @@ -131,6 +132,7 @@ class BeerPongReward: infos["success"] = success infos["is_collided"] = self._is_collided infos["ball_pos"] = ball_pos.copy() + infos["ball_vel"] = ball_vel.copy() infos["action_cost"] = 5e-4 * action_cost return reward, infos diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index 9d1d878..d64f179 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -81,32 +81,36 @@ class BeerPongReward: action_cost = np.sum(np.square(action)) self.action_costs.append(action_cost) + if not self.ball_table_contact: + self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id, + self.table_collision_id) + self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) if env._steps == env.ep_length - 1 or self._is_collided: min_dist = np.min(self.dists) - ball_table_bounce = self._check_collision_single_objects(env.sim, self.ball_collision_id, - self.table_collision_id) - ball_cup_table_cont = self._check_collision_with_set_of_objects(env.sim, self.ball_collision_id, - self.cup_collision_ids) - ball_wall_cont = self._check_collision_single_objects(env.sim, self.ball_collision_id, - self.wall_collision_id) + final_dist = self.dists_final[-1] + ball_in_cup = self._check_collision_single_objects(env.sim, self.ball_collision_id, self.cup_table_collision_id) - if not ball_in_cup: - cost_offset = 2 - if not ball_cup_table_cont and not ball_table_bounce and not ball_wall_cont: - cost_offset += 2 - cost = cost_offset + min_dist ** 2 + 0.5 * self.dists_final[-1] ** 2 + 1e-7 * action_cost - else: - cost = self.dists_final[-1] ** 2 + 1.5 * action_cost * 1e-7 - reward = - 1 * cost - self.collision_penalty * int(self._is_collided) + # encourage bounce before falling into cup + if not ball_in_cup: + if not self.ball_table_contact: + reward = 0.2 * (1 - np.tanh(min_dist ** 2)) + 0.1 * (1 - np.tanh(final_dist ** 2)) + else: + reward = (1 - np.tanh(min_dist ** 2)) + 0.5 * (1 - np.tanh(final_dist ** 2)) + else: + if not self.ball_table_contact: + reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 1 + else: + reward = 2 * (1 - np.tanh(final_dist ** 2)) + 1 * (1 - np.tanh(min_dist ** 2)) + 3 + + # reward = - 1 * cost - self.collision_penalty * int(self._is_collided) success = ball_in_cup crash = self._is_collided else: - reward = - 1e-7 * action_cost - cost = 0 + reward = - 1e-4 * action_cost success = False crash = False @@ -115,26 +119,11 @@ class BeerPongReward: infos["is_collided"] = self._is_collided infos["ball_pos"] = ball_pos.copy() infos["ball_vel"] = ball_vel.copy() - infos["action_cost"] = 5e-4 * action_cost - infos["task_cost"] = cost + infos["action_cost"] = action_cost + infos["task_reward"] = reward return reward, infos - def get_cost_offset(self): - if self.ball_ground_contact: - return 200 - - if not self.ball_table_contact: - return 100 - - if not self.ball_in_cup: - return 50 - - if self.ball_in_cup and self.ball_cup_contact and not self.noisy_bp: - return 10 - - return 0 - def _check_collision_single_objects(self, sim, id_1, id_2): for coni in range(0, sim.data.ncon): con = sim.data.contact[coni] diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py index 73da83d..1708d38 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_simple.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py @@ -6,8 +6,6 @@ from gym.envs.mujoco import MujocoEnv class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): - utils.EzPickle.__init__(**locals()) - self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -28,15 +26,13 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): self.context = None - MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) - # alr_mujoco_env.AlrMujocoEnv.__init__(self, # self.xml_path, # apply_gravity_comp=apply_gravity_comp, # n_substeps=n_substeps) self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / self.dt) + # self.sim_steps = int(self.sim_time / self.dt) if reward_function is None: from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward reward_function = BeerpongReward @@ -46,6 +42,9 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): self.cup_table_id = self.sim.model._body_name2id["cup_table"] # self.bounce_table_id = self.sim.model._body_name2id["bounce_table"] + MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) + utils.EzPickle.__init__(self) + @property def current_pos(self): return self.sim.data.qpos[0:7].copy() @@ -90,7 +89,7 @@ class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): reward_ctrl = - np.square(a).sum() action_cost = np.sum(np.square(a)) - crash = self.do_simulation(a) + crash = self.do_simulation(a, self.frame_skip) joint_cons_viol = self.check_traj_in_joint_limits() self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py index 635d49d..f42c4c7 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -10,7 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward #TODO: Check for simulation stability. Make sure the code runs even for sim crash -MAX_EPISODE_STEPS = 1375 +MAX_EPISODE_STEPS = 2875 BALL_NAME_CONTACT = "target_ball_contact" BALL_NAME = "target_ball" TABLE_NAME = 'table_tennis_table' @@ -22,15 +22,20 @@ RACKET_NAME = 'bat' CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]]) CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]]) -class TT_Env_Gym(MujocoEnv, utils.EzPickle): - def __init__(self, ctxt_dim=2): +class TTEnvGym(MujocoEnv, utils.EzPickle): + + def __init__(self, ctxt_dim=2, fixed_goal=False): model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml') self.ctxt_dim = ctxt_dim + self.fixed_goal = fixed_goal if ctxt_dim == 2: self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM - self.goal = np.zeros(3) # 2 x,y + 1z + if self.fixed_goal: + self.goal = np.array([-1, -0.1, 0]) + else: + self.goal = np.zeros(3) # 2 x,y + 1z elif ctxt_dim == 4: self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM self.goal = np.zeros(3) @@ -47,10 +52,10 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): self.reward_func = TT_Reward(self.ctxt_dim) self.ball_landing_pos = None - self.hited_ball = False + self.hit_ball = False self.ball_contact_after_hit = False self._ids_set = False - super(TT_Env_Gym, self).__init__(model_path=model_path, frame_skip=1) + super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1) self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] @@ -77,15 +82,18 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): return obs def sample_context(self): - return np.random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) + return self.np_random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) def reset_model(self): self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state self.time_steps = 0 self.ball_landing_pos = None - self.hited_ball = False + self.hit_ball = False self.ball_contact_after_hit = False - self.goal = self.sample_context()[:2] + if self.fixed_goal: + self.goal = self.goal[:2] + else: + self.goal = self.sample_context()[:2] if self.ctxt_dim == 2: initial_ball_state = ball_init(random=False) # fixed velocity, fixed position elif self.ctxt_dim == 4: @@ -122,12 +130,12 @@ class TT_Env_Gym(MujocoEnv, utils.EzPickle): if not self._ids_set: self._set_ids() done = False - episode_end = False if self.time_steps+1 Date: Tue, 7 Dec 2021 14:46:31 +0100 Subject: [PATCH 3/3] small bp and tt updates --- alr_envs/alr/__init__.py | 51 ++++++++++--------- alr_envs/alr/mujoco/beerpong/beerpong.py | 28 +++++----- .../mujoco/beerpong/beerpong_reward_staged.py | 2 +- alr_envs/alr/mujoco/table_tennis/tt_gym.py | 14 +++-- alr_envs/alr/mujoco/table_tennis/tt_reward.py | 2 +- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index e2ba068..90ec78c 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -204,7 +204,7 @@ register(id='TableTennis2DCtxt-v0', register(id='TableTennis2DCtxt-v1', entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=1750, + max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register(id='TableTennis4DCtxt-v0', @@ -365,29 +365,32 @@ for _v in _versions: ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## Beerpong -register( - id='BeerpongProMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": "alr_envs:ALRBeerPong-v0", - "wrappers": [mujoco.beerpong.MPWrapper], - "mp_kwargs": { - "num_dof": 7, - "num_basis": 2, - "duration": 1, - "post_traj_time": 2, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) +_versions = ["v0", "v1", "v2", "v3"] +for _v in _versions: + _env_id = f'BeerpongProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRBeerPong-{_v}", + "wrappers": [mujoco.beerpong.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "duration": 1, + "post_traj_time": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + } } } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0") + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## Table Tennis ctxt_dim = [2, 4] @@ -429,7 +432,9 @@ register( "duration": 1., "post_traj_time": 2.5, "policy_type": "motor", - "weights_scale": 0.2, + "weights_scale": 1, + "off": -0.05, + "bandwidth_factor": 3.5, "zero_start": True, "zero_goal": False, "policy_kwargs": { diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index a86f0a7..755710a 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -127,24 +127,28 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self._steps += 1 else: reward = -30 + reward_infos = dict() success = False is_collided = False done = True ball_pos = np.zeros(3) ball_vel = np.zeros(3) - return ob, reward, done, dict(reward_dist=reward_dist, - reward_ctrl=reward_ctrl, - reward=reward, - velocity=angular_vel, - # traj=self._q_pos, - action=a, - q_pos=self.sim.data.qpos[0:7].ravel().copy(), - q_vel=self.sim.data.qvel[0:7].ravel().copy(), - ball_pos=ball_pos, - ball_vel=ball_vel, - success=success, - is_collided=is_collided, sim_crash=crash) + infos = dict(reward_dist=reward_dist, + reward_ctrl=reward_ctrl, + reward=reward, + velocity=angular_vel, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + ball_pos=ball_pos, + ball_vel=ball_vel, + success=success, + is_collided=is_collided, sim_crash=crash) + infos.update(reward_infos) + + return ob, reward, done, infos def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index d64f179..e94b470 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -110,7 +110,7 @@ class BeerPongReward: success = ball_in_cup crash = self._is_collided else: - reward = - 1e-4 * action_cost + reward = - 1e-2 * action_cost success = False crash = False diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py index f42c4c7..d1c2dc3 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -10,7 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward #TODO: Check for simulation stability. Make sure the code runs even for sim crash -MAX_EPISODE_STEPS = 2875 +MAX_EPISODE_STEPS = 1750 BALL_NAME_CONTACT = "target_ball_contact" BALL_NAME = "target_ball" TABLE_NAME = 'table_tennis_table' @@ -42,9 +42,10 @@ class TTEnvGym(MujocoEnv, utils.EzPickle): else: raise ValueError("either 2 or 4 dimensional Contexts available") - action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) - action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) - self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') + # has no effect as it is overwritten in init of super + # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) + # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) + # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') self.time_steps = 0 self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) @@ -159,7 +160,10 @@ class TTEnvGym(MujocoEnv, utils.EzPickle): done = True reward = -25 ob = self._get_obs() - return ob, reward, done, {"hit_ball": self.hit_ball} # might add some information here .... + info = {"hit_ball": self.hit_ball, + "q_pos": np.copy(self.sim.data.qpos[:7]), + "ball_pos": np.copy(self.sim.data.qpos[7:])} + return ob, reward, done, info # might add some information here .... def set_context(self, context): old_state = self.sim.get_state() diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py index eab2dd3..0e1bebf 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_reward.py @@ -19,7 +19,7 @@ class TT_Reward: # # seems to work for episodic case min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1)) if not hited_ball: - return 0.2 * (1- np.tanh(min_r_b_dist**2)) + return 0.2 * (1 - np.tanh(min_r_b_dist**2)) else: if ball_landing_pos is None: min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))