From 87eb093c2c86423b755ade920b2429d70f13a0a4 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 1 Jul 2021 14:55:14 +0200 Subject: [PATCH] Add open ai gym environments --- README.md | 11 +++ alr_envs/__init__.py | 77 +++++++++++++++++++ alr_envs/examples/examples_open_ai.py | 41 ++++++++++ alr_envs/open_ai/__init__.py | 0 .../continuous_mountain_car/__init__.py | 2 + .../continuous_mountain_car/mp_wrapper.py | 17 ++++ .../positional_wrapper.py | 13 ++++ alr_envs/open_ai/fetch/__init__.py | 2 + alr_envs/open_ai/fetch/mp_wrapper.py | 18 +++++ alr_envs/open_ai/fetch/positional_wrapper.py | 13 ++++ alr_envs/open_ai/reacher_v2/__init__.py | 2 + alr_envs/open_ai/reacher_v2/mp_wrapper.py | 18 +++++ .../open_ai/reacher_v2/positional_wrapper.py | 13 ++++ 13 files changed, 227 insertions(+) create mode 100644 alr_envs/examples/examples_open_ai.py create mode 100644 alr_envs/open_ai/__init__.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/__init__.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py create mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py create mode 100644 alr_envs/open_ai/fetch/__init__.py create mode 100644 alr_envs/open_ai/fetch/mp_wrapper.py create mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py create mode 100644 alr_envs/open_ai/reacher_v2/__init__.py create mode 100644 alr_envs/open_ai/reacher_v2/mp_wrapper.py create mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py diff --git a/README.md b/README.md index ce95b8d..2ab4143 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou [//]: |`HoleReacherDetPMP-v0`| +### OpenAi-gym Environments +These environments are wrapped-versions of their OpenAi-gym counterparts. + +|Name| Description|Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 +|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 +|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 +|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 + + ### Stochastic Search |Name| Description|Horizon|Action Dimension|Observation Dimension |---|---|---|---|---| diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index b1056a2..181b627 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper +from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock # Mujoco @@ -560,6 +561,82 @@ register( } ) +## Open AI +register( + id='ContinuousMountainCarDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.classic_control:MountainCarContinuous-v0", + "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 4, + "duration": 100, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": 1., + "d_gains": 1. + } + } + } +) + +register( + id='ReacherDetPMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.mujoco:Reacher-v2", + "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 6, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } + } + } +) + +register( + id='FetchSlideDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchSlideDense-v1", + "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) + +register( + id='FetchReachDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchReachDense-v1", + "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) # BBO functions diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py new file mode 100644 index 0000000..d001bc8 --- /dev/null +++ b/alr_envs/examples/examples_open_ai.py @@ -0,0 +1,41 @@ +from alr_envs.utils.make_env_helpers import make_env + + +def example_mp(env_name, seed=1): + """ + Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. + For more information on motion primitive specific stuff, look at the mp examples. + Args: + env_name: DetPMP env_id + seed: seed + + Returns: + + """ + # While in this case gym.make() is possible to use as well, we recommend our custom make env function. + env = make_env(env_name, seed) + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + +if __name__ == '__main__': + # DMP - not supported yet + #example_mp("ReacherDetPMP-v2") + + # DetProMP + example_mp("ContinuousMountainCarDetPMP-v0") + example_mp("ReacherDetPMP-v2") + example_mp("FetchReachDenseDetPMP-v1") + example_mp("FetchSlideDenseDetPMP-v1") + diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py new file mode 100644 index 0000000..4cff6da --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py new file mode 100644 index 0000000..960fc0c --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py @@ -0,0 +1,17 @@ +from typing import Union + +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def start_pos(self): + raise ValueError("Start position is not available") + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return 1. \ No newline at end of file diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py new file mode 100644 index 0000000..5b587fa --- /dev/null +++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[1]]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[0]]) \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py new file mode 100644 index 0000000..4c6d088 --- /dev/null +++ b/alr_envs/open_ai/fetch/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py new file mode 100644 index 0000000..2ac7b59 --- /dev/null +++ b/alr_envs/open_ai/fetch/mp_wrapper.py @@ -0,0 +1,18 @@ +from typing import Union + +from gym import spaces +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def start_pos(self): + return self.initial_gripper_xpos + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt \ No newline at end of file diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py new file mode 100644 index 0000000..c113db6 --- /dev/null +++ b/alr_envs/open_ai/fetch/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return self._get_obs()["observation"][-5:-1] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self._get_obs()["observation"][:4] \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py new file mode 100644 index 0000000..a0acbea --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/__init__.py @@ -0,0 +1,2 @@ +from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper +from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py new file mode 100644 index 0000000..be67a35 --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py @@ -0,0 +1,18 @@ +from typing import Union + +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def start_pos(self): + raise ValueError("Start position is not available") + + @property + def goal_pos(self): + return self.goal + + @property + def dt(self) -> Union[float, int]: + return self.env.dt \ No newline at end of file diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py new file mode 100644 index 0000000..0fc622b --- /dev/null +++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py @@ -0,0 +1,13 @@ +from typing import Union +import numpy as np +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper + + +class PositionalWrapper(PositionalEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qvel[:2] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qpos[:2] \ No newline at end of file