From 87eb093c2c86423b755ade920b2429d70f13a0a4 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 1 Jul 2021 14:55:14 +0200 Subject: [PATCH 1/6] Add open ai gym environments --- README.md | 11 +++ alr_envs/__init__.py | 77 +++++++++++++++++++ alr_envs/examples/examples_open_ai.py | 41 ++++++++++ alr_envs/open_ai/__init__.py | 0 .../continuous_mountain_car/__init__.py | 2 + .../continuous_mountain_car/mp_wrapper.py | 17 ++++ .../positional_wrapper.py | 13 ++++ alr_envs/open_ai/fetch/__init__.py | 2 + alr_envs/open_ai/fetch/mp_wrapper.py | 18 +++++ alr_envs/open_ai/fetch/positional_wrapper.py | 13 ++++ alr_envs/open_ai/reacher_v2/__init__.py | 2 + alr_envs/open_ai/reacher_v2/mp_wrapper.py | 18 +++++ .../open_ai/reacher_v2/positional_wrapper.py | 13 ++++ 13 files changed, 227 insertions(+) create mode 100644 alr_envs/examples/examples_open_ai.py create mode 100644 alr_envs/open_ai/__init__.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/__init__.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py create mode 100644 alr_envs/open_ai/fetch/__init__.py create mode 100644 alr_envs/open_ai/fetch/mp_wrapper.py create mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py create mode 100644 alr_envs/open_ai/reacher_v2/__init__.py create mode 100644 alr_envs/open_ai/reacher_v2/mp_wrapper.py create mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py diff --git a/README.md b/README.md index ce95b8d..2ab4143 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou [//]: |`HoleReacherDetPMP-v0`| +### OpenAi-gym Environments +These environments are wrapped-versions of their OpenAi-gym counterparts. + +|Name| Description|Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 +|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 +|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 +|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 + + ### Stochastic Search |Name| Description|Horizon|Action Dimension|Observation Dimension |---|---|---|---|---| diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index b1056a2..181b627 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper +from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock # Mujoco @@ -560,6 +561,82 @@ register( } ) +## Open AI +register( + id='ContinuousMountainCarDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.classic_control:MountainCarContinuous-v0", + "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 4, + "duration": 100, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": 1., + "d_gains": 1. + } + } + } +) + +register( + id='ReacherDetPMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.mujoco:Reacher-v2", + "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 6, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } + } + } +) + +register( + id='FetchSlideDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchSlideDense-v1", + "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) + +register( + id='FetchReachDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchReachDense-v1", + "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) # BBO functions diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py new file mode 100644 index 0000000..d001bc8 --- /dev/null +++ b/alr_envs/examples/examples_open_ai.py @@ -0,0 +1,41 @@ +from alr_envs.utils.make_env_helpers import make_env + + +def example_mp(env_name, seed=1): + """ + Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. + For more information on motion primitive specific stuff, look at the mp examples. + Args: + env_name: DetPMP env_id + seed: seed + + Returns: + + """ + # While in this case gym.make() is possible to use as well, we recommend our custom make env function. + env = make_env(env_name, seed) + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + +if __name__ == '__main__': + # DMP - not supported yet + #example_mp("ReacherDetPMP-v2") + + # DetProMP + example_mp("ContinuousMountainCarDetPMP-v0") + example_mp("ReacherDetPMP-v2") + example_mp("FetchReachDenseDetPMP-v1") + example_mp("FetchSlideDenseDetPMP-v1") + diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py new file mode 100644 index 0000000..4cff6da --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py new file mode 100644 index 0000000..960fc0c --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py @@ -0,0 +1,17 @@ +from typing import Union + +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def start_pos(self): + raise ValueError("Start position is not available") + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return 1. \ No newline at end of file diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py new file mode 100644 index 0000000..5b587fa --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[1]]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[0]]) \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py new file mode 100644 index 0000000..4c6d088 --- /dev/null +++ b/alr_envs/open_ai/fetch/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py new file mode 100644 index 0000000..2ac7b59 --- /dev/null +++ b/alr_envs/open_ai/fetch/mp_wrapper.py @@ -0,0 +1,18 @@ +from typing import Union + +from gym import spaces +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def start_pos(self): + return self.initial_gripper_xpos + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py new file mode 100644 index 0000000..c113db6 --- /dev/null +++ b/alr_envs/open_ai/fetch/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return self._get_obs()["observation"][-5:-1] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self._get_obs()["observation"][:4] \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py new file mode 100644 index 0000000..a0acbea --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py new file mode 100644 index 0000000..be67a35 --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py @@ -0,0 +1,18 @@ +from typing import Union + +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def start_pos(self): + raise ValueError("Start position is not available") + + @property + def goal_pos(self): + return self.goal + + @property + def dt(self) -> Union[float, int]: + return self.env.dt \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py new file mode 100644 index 0000000..0fc622b --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qvel[:2] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qpos[:2] \ No newline at end of file From 585bdc7b47dae56017db19c17a81e337d20a1c50 Mon Sep 17 00:00:00 2001 From: Marcel Date: Fri, 2 Jul 2021 06:12:32 +0200 Subject: [PATCH 2/6] Add dm_control to setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7170fa6..cc9445c 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ setup( 'PyQt5', 'matplotlib', 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', - 'mujoco_py' + 'mujoco_py', + 'dm_control' ], url='https://github.com/ALRhub/alr_envs/', From 28d10ef0894dd0f974886f5c03b9ada9646162dd Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 5 Jul 2021 08:14:39 +0200 Subject: [PATCH 3/6] Bug fixes for wrapped OpenAi environments --- alr_envs/__init__.py | 6 +++--- alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py | 2 +- alr_envs/open_ai/fetch/positional_wrapper.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 181b627..2611701 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -571,7 +571,7 @@ register( "mp_kwargs": { "num_dof": 1, "num_basis": 4, - "duration": 100, + "duration": 2, "post_traj_time": 0, "width": 0.02, "policy_type": "motor", @@ -613,7 +613,7 @@ register( "mp_kwargs": { "num_dof": 4, "num_basis": 5, - "duration": 1, + "duration": 2, "post_traj_time": 0, "width": 0.02, "policy_type": "position" @@ -630,7 +630,7 @@ register( "mp_kwargs": { "num_dof": 4, "num_basis": 5, - "duration": 1, + "duration": 2, "post_traj_time": 0, "width": 0.02, "policy_type": "position" diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py index 960fc0c..886b1e1 100644 --- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py +++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py @@ -14,4 +14,4 @@ class MPWrapper(MPEnvWrapper): @property def dt(self) -> Union[float, int]: - return 1. \ No newline at end of file + return 0.02 \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py index c113db6..9c6dcf2 100644 --- a/alr_envs/open_ai/fetch/positional_wrapper.py +++ b/alr_envs/open_ai/fetch/positional_wrapper.py @@ -6,8 +6,8 @@ from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper class PositionalWrapper(PositionalEnvWrapper): @property def current_vel(self) -> Union[float, int, np.ndarray]: - return self._get_obs()["observation"][-5:-1] + return self.unwrapped._get_obs()["observation"][-5:-1] @property def current_pos(self) -> Union[float, int, np.ndarray]: - return self._get_obs()["observation"][:4] \ No newline at end of file + return self.unwrapped._get_obs()["observation"][:4] \ No newline at end of file From 0046ade102be85d56c79933ac180004b06b13e7e Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 5 Jul 2021 09:16:36 +0200 Subject: [PATCH 4/6] Adopt new interface structure --- alr_envs/__init__.py | 8 ++++---- .../open_ai/continuous_mountain_car/__init__.py | 1 - .../open_ai/continuous_mountain_car/mp_wrapper.py | 9 +++++++-- .../continuous_mountain_car/positional_wrapper.py | 13 ------------- alr_envs/open_ai/fetch/__init__.py | 1 - alr_envs/open_ai/fetch/mp_wrapper.py | 10 +++++++--- alr_envs/open_ai/fetch/positional_wrapper.py | 13 ------------- alr_envs/open_ai/reacher_v2/__init__.py | 1 - alr_envs/open_ai/reacher_v2/mp_wrapper.py | 9 +++++++-- alr_envs/open_ai/reacher_v2/positional_wrapper.py | 13 ------------- 10 files changed, 25 insertions(+), 53 deletions(-) delete mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py delete mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py delete mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 144b5f2..0dfc1f5 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -573,7 +573,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "gym.envs.classic_control:MountainCarContinuous-v0", - "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper], + "wrappers": [continuous_mountain_car.MPWrapper], "mp_kwargs": { "num_dof": 1, "num_basis": 4, @@ -594,7 +594,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "gym.envs.mujoco:Reacher-v2", - "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper], + "wrappers": [reacher_v2.MPWrapper], "mp_kwargs": { "num_dof": 2, "num_basis": 6, @@ -615,7 +615,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchSlideDense-v1", - "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "wrappers": [fetch.MPWrapper], "mp_kwargs": { "num_dof": 4, "num_basis": 5, @@ -632,7 +632,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "gym.envs.robotics:FetchReachDense-v1", - "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "wrappers": [fetch.MPWrapper], "mp_kwargs": { "num_dof": 4, "num_basis": 5, diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py index 4cff6da..36f731d 100644 --- a/alr_envs/open_ai/continuous_mountain_car/__init__.py +++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py @@ -1,2 +1 @@ -from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py index 886b1e1..f0bccab 100644 --- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py +++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py @@ -1,12 +1,17 @@ from typing import Union +import numpy as np from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): @property - def start_pos(self): - raise ValueError("Start position is not available") + def current_vel(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[1]]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[0]]) @property def goal_pos(self): diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py deleted file mode 100644 index 5b587fa..0000000 --- a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py +++ /dev/null @@ -1,13 +0,0 @@ -from typing import Union -import numpy as np -from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper - - -class PositionalWrapper(PositionalEnvWrapper): - @property - def current_vel(self) -> Union[float, int, np.ndarray]: - return np.array([self.state[1]]) - - @property - def current_pos(self) -> Union[float, int, np.ndarray]: - return np.array([self.state[0]]) \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py index 4c6d088..2e68176 100644 --- a/alr_envs/open_ai/fetch/__init__.py +++ b/alr_envs/open_ai/fetch/__init__.py @@ -1,2 +1 @@ -from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py index 2ac7b59..acb07a3 100644 --- a/alr_envs/open_ai/fetch/mp_wrapper.py +++ b/alr_envs/open_ai/fetch/mp_wrapper.py @@ -1,13 +1,17 @@ from typing import Union -from gym import spaces +import numpy as np from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): @property - def start_pos(self): - return self.initial_gripper_xpos + def current_vel(self) -> Union[float, int, np.ndarray]: + return self.unwrapped._get_obs()["observation"][-5:-1] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.unwrapped._get_obs()["observation"][:4] @property def goal_pos(self): diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py deleted file mode 100644 index 9c6dcf2..0000000 --- a/alr_envs/open_ai/fetch/positional_wrapper.py +++ /dev/null @@ -1,13 +0,0 @@ -from typing import Union -import numpy as np -from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper - - -class PositionalWrapper(PositionalEnvWrapper): - @property - def current_vel(self) -> Union[float, int, np.ndarray]: - return self.unwrapped._get_obs()["observation"][-5:-1] - - @property - def current_pos(self) -> Union[float, int, np.ndarray]: - return self.unwrapped._get_obs()["observation"][:4] \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py index a0acbea..48a5615 100644 --- a/alr_envs/open_ai/reacher_v2/__init__.py +++ b/alr_envs/open_ai/reacher_v2/__init__.py @@ -1,2 +1 @@ -from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py index be67a35..7636f50 100644 --- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py +++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py @@ -1,13 +1,18 @@ from typing import Union +import numpy as np from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): @property - def start_pos(self): - raise ValueError("Start position is not available") + def current_vel(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qvel[:2] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qpos[:2] @property def goal_pos(self): diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py deleted file mode 100644 index 0fc622b..0000000 --- a/alr_envs/open_ai/reacher_v2/positional_wrapper.py +++ /dev/null @@ -1,13 +0,0 @@ -from typing import Union -import numpy as np -from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper - - -class PositionalWrapper(PositionalEnvWrapper): - @property - def current_vel(self) -> Union[float, int, np.ndarray]: - return self.sim.data.qvel[:2] - - @property - def current_pos(self) -> Union[float, int, np.ndarray]: - return self.sim.data.qpos[:2] \ No newline at end of file From 92e6a84d0333232cbeae0474c52bd6dcda1c37ab Mon Sep 17 00:00:00 2001 From: Marcel Date: Mon, 5 Jul 2021 09:52:41 +0200 Subject: [PATCH 5/6] Adopt new interface structure --- README.md | 3 ++- alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py | 2 +- alr_envs/open_ai/fetch/mp_wrapper.py | 2 +- alr_envs/open_ai/reacher_v2/mp_wrapper.py | 6 +----- reacher.egg-info/PKG-INFO | 10 ---------- reacher.egg-info/SOURCES.txt | 7 ------- reacher.egg-info/dependency_links.txt | 1 - reacher.egg-info/requires.txt | 1 - reacher.egg-info/top_level.txt | 1 - setup.py | 4 ++-- 10 files changed, 7 insertions(+), 30 deletions(-) delete mode 100644 reacher.egg-info/PKG-INFO delete mode 100644 reacher.egg-info/SOURCES.txt delete mode 100644 reacher.egg-info/dependency_links.txt delete mode 100644 reacher.egg-info/requires.txt delete mode 100644 reacher.egg-info/top_level.txt diff --git a/README.md b/README.md index 2ab4143..56ec0bf 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation. -We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. +We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually). +When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks. ## Environments diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py index f0bccab..29378ed 100644 --- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py +++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py @@ -1,7 +1,7 @@ from typing import Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py index acb07a3..6602a18 100644 --- a/alr_envs/open_ai/fetch/mp_wrapper.py +++ b/alr_envs/open_ai/fetch/mp_wrapper.py @@ -1,7 +1,7 @@ from typing import Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py index 7636f50..d3181b5 100644 --- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py +++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py @@ -1,7 +1,7 @@ from typing import Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class MPWrapper(MPEnvWrapper): @@ -14,10 +14,6 @@ class MPWrapper(MPEnvWrapper): def current_pos(self) -> Union[float, int, np.ndarray]: return self.sim.data.qpos[:2] - @property - def goal_pos(self): - return self.goal - @property def dt(self) -> Union[float, int]: return self.env.dt \ No newline at end of file diff --git a/reacher.egg-info/PKG-INFO b/reacher.egg-info/PKG-INFO deleted file mode 100644 index 9ea9f7e..0000000 --- a/reacher.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: reacher -Version: 0.0.1 -Summary: UNKNOWN -Home-page: UNKNOWN -Author: UNKNOWN -Author-email: UNKNOWN -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/reacher.egg-info/SOURCES.txt b/reacher.egg-info/SOURCES.txt deleted file mode 100644 index b771181..0000000 --- a/reacher.egg-info/SOURCES.txt +++ /dev/null @@ -1,7 +0,0 @@ -README.md -setup.py -reacher.egg-info/PKG-INFO -reacher.egg-info/SOURCES.txt -reacher.egg-info/dependency_links.txt -reacher.egg-info/requires.txt -reacher.egg-info/top_level.txt \ No newline at end of file diff --git a/reacher.egg-info/dependency_links.txt b/reacher.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/reacher.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/reacher.egg-info/requires.txt b/reacher.egg-info/requires.txt deleted file mode 100644 index 1e6c2dd..0000000 --- a/reacher.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -gym diff --git a/reacher.egg-info/top_level.txt b/reacher.egg-info/top_level.txt deleted file mode 100644 index 8b13789..0000000 --- a/reacher.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/setup.py b/setup.py index cc9445c..703bbee 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,14 @@ from setuptools import setup setup( name='alr_envs', version='0.0.1', - packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search', + packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search', 'alr_envs.utils'], install_requires=[ 'gym', 'PyQt5', 'matplotlib', 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', - 'mujoco_py', + 'mujoco-py<2.1,>=2.0', 'dm_control' ], From d2414797c7f2e8156fc76213e61b1431c7be6f7b Mon Sep 17 00:00:00 2001 From: ottofabian Date: Fri, 23 Jul 2021 15:18:39 +0200 Subject: [PATCH 6/6] Update README.md --- README.md | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 56ec0bf..a4ad30f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,11 @@ -## ALR Environments +## ALR Robotics Control Environments This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation. We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually). -When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. -When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks. -## Environments +## Step-based Environments Currently we have the following environments: ### Mujoco @@ -33,11 +31,13 @@ Currently we have the following environments: |`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18 |`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18 -### DMP Environments -These environments are closer to stochastic search. They always execute a full trajectory, which is computed by a DMP and executed by a controller, e.g. a PD controller. -The goal is to learn the parameters of this DMP to generate a suitable trajectory. -All environments provide the full episode reward and additional information about early terminations, e.g. due to collisions. +## Motion Primitive Environments (Episodic environments) +Unlike step-based environments, these motion primitive (MP) environments are closer to stochastic search and what can be found in robotics. They always execute a full trajectory, which is computed by a Dynamic Motion Primitive (DMP) or Probabilitic Motion Primitive (DetPMP) and translated into individual actions with a controller, e.g. a PD controller. The actual Controller, however, depends on the type of environment, i.e. position, velocity, or torque controlled. +The goal is to learn the parametrization of the motion primitives in order to generate a suitable trajectory. +MP This can also be done in a contextual setting, where all changing elements of the task are exposed once in the beginning. This requires to find a new parametrization for each trajectory. +All environments provide the full cumulative episode reward and additional information about early terminations, e.g. due to collisions. +### Classic Control |Name| Description|Horizon|Action Dimension|Context Dimension |---|---|---|---|---| |`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 @@ -49,18 +49,31 @@ All environments provide the full episode reward and additional information abou [//]: |`HoleReacherDetPMP-v0`| -### OpenAi-gym Environments -These environments are wrapped-versions of their OpenAi-gym counterparts. +### OpenAI gym Environments +These environments are wrapped-versions of their OpenAI-gym counterparts. -|Name| Description|Horizon|Action Dimension|Context Dimension +|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension |---|---|---|---|---| |`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 |`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 |`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 |`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 +### Deep Mind Control Suite Environments +These environments are wrapped-versions of their Deep Mind Control Suite (DMC) counterparts. +Given most task can be solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task. -### Stochastic Search +|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`dmc_ball_in_cup-catch_detpmp-v0`| A DetPmP wrapped version of the "catch" task for the "ball_in_cup" environment. | 50 | 10 | 2 +|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 50| 10 | 2 +|`dmc_reacher-easy_detpmp-v0`| A DetPmP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 +|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4 +|`dmc_reacher-hard_detpmp-v0`| A DetPmP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4 +|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 + + +## Stochastic Search |Name| Description|Horizon|Action Dimension|Observation Dimension |---|---|---|---|---| |`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. | 1 | `{dim}` | 0 @@ -96,4 +109,4 @@ for i in range(10000): ``` -For an example using a DMP wrapped env and asynchronous sampling look at [mp_env_async_sampler.py](./alr_envs/utils/mp_env_async_sampler.py) \ No newline at end of file +For an example using a DMP wrapped env and asynchronous sampling look at [mp_env_async_sampler.py](./alr_envs/utils/mp_env_async_sampler.py)