Add open ai gym environments

2021-07-01 14:55:14 +02:00 · 2021-07-01 14:55:14 +02:00 · 87eb093c2c
commit 87eb093c2c
parent 6607d9cff9
13 changed files with 227 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou

 [//]:  |`HoleReacherDetPMP-v0`|

+### OpenAi-gym Environments
+These environments are wrapped-versions of their OpenAi-gym counterparts.
+
+|Name| Description|Horizon|Action Dimension|Context Dimension
+|---|---|---|---|---|
+|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
+|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
+|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
+|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
+
+
 ### Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor
 from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
+from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock

 # Mujoco
@ -560,6 +561,82 @@ register(
    }
 )

+## Open AI
+register(
+    id='ContinuousMountainCarDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.classic_control:MountainCarContinuous-v0",
+        "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 1,
+            "num_basis": 4,
+            "duration": 100,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": 1.,
+                "d_gains": 1.
+            }
+        }
+    }
+)
+
+register(
+    id='ReacherDetPMP-v2',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.mujoco:Reacher-v2",
+        "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 2,
+            "num_basis": 6,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": .6,
+                "d_gains": .075
+            }
+        }
+    }
+)
+
+register(
+    id='FetchSlideDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchSlideDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
+
+register(
+    id='FetchReachDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchReachDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)

 # BBO functions

--- a/alr_envs/examples/examples_open_ai.py
+++ b/alr_envs/examples/examples_open_ai.py
@ -0,0 +1,41 @@
+from alr_envs.utils.make_env_helpers import make_env
+
+
+def example_mp(env_name, seed=1):
+    """
+    Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
+    For more information on motion primitive specific stuff, look at the mp examples.
+    Args:
+        env_name: DetPMP env_id
+        seed: seed
+
+    Returns:
+
+    """
+    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
+    env = make_env(env_name, seed)
+
+    rewards = 0
+    obs = env.reset()
+
+    # number of samples/full trajectories (multiple environment steps)
+    for i in range(10):
+        ac = env.action_space.sample()
+        obs, reward, done, info = env.step(ac)
+        rewards += reward
+
+        if done:
+            print(rewards)
+            rewards = 0
+            obs = env.reset()
+
+if __name__ == '__main__':
+    # DMP - not supported yet
+    #example_mp("ReacherDetPMP-v2")
+
+    # DetProMP
+    example_mp("ContinuousMountainCarDetPMP-v0")
+    example_mp("ReacherDetPMP-v2")
+    example_mp("FetchReachDenseDetPMP-v1")
+    example_mp("FetchSlideDenseDetPMP-v1")
+
--- a/alr_envs/open_ai/init.py
+++ b/alr_envs/open_ai/init.py
--- a/alr_envs/open_ai/continuous_mountain_car/init.py
+++ b/alr_envs/open_ai/continuous_mountain_car/init.py
@ -0,0 +1,2 @@
+from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@ -0,0 +1,17 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return 1.
--- a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[1]])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[0]])
--- a/alr_envs/open_ai/fetch/init.py
+++ b/alr_envs/open_ai/fetch/init.py
@ -0,0 +1,2 @@
+from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@ -0,0 +1,18 @@
+from typing import Union
+
+from gym import spaces
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        return self.initial_gripper_xpos
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/open_ai/fetch/positional_wrapper.py
+++ b/alr_envs/open_ai/fetch/positional_wrapper.py
@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][-5:-1]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][:4]
--- a/alr_envs/open_ai/reacher_v2/init.py
+++ b/alr_envs/open_ai/reacher_v2/init.py
@ -0,0 +1,2 @@
+from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@ -0,0 +1,18 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        return self.goal
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/open_ai/reacher_v2/positional_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qvel[:2]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qpos[:2]