From 87eb093c2c86423b755ade920b2429d70f13a0a4 Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Thu, 1 Jul 2021 14:55:14 +0200
Subject: [PATCH] Add open ai gym environments

---
 README.md                                     | 11 +++
 alr_envs/__init__.py                          | 77 +++++++++++++++++++
 alr_envs/examples/examples_open_ai.py         | 41 ++++++++++
 alr_envs/open_ai/__init__.py                  |  0
 .../continuous_mountain_car/__init__.py       |  2 +
 .../continuous_mountain_car/mp_wrapper.py     | 17 ++++
 .../positional_wrapper.py                     | 13 ++++
 alr_envs/open_ai/fetch/__init__.py            |  2 +
 alr_envs/open_ai/fetch/mp_wrapper.py          | 18 +++++
 alr_envs/open_ai/fetch/positional_wrapper.py  | 13 ++++
 alr_envs/open_ai/reacher_v2/__init__.py       |  2 +
 alr_envs/open_ai/reacher_v2/mp_wrapper.py     | 18 +++++
 .../open_ai/reacher_v2/positional_wrapper.py  | 13 ++++
 13 files changed, 227 insertions(+)
 create mode 100644 alr_envs/examples/examples_open_ai.py
 create mode 100644 alr_envs/open_ai/__init__.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/__init__.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
 create mode 100644 alr_envs/open_ai/fetch/__init__.py
 create mode 100644 alr_envs/open_ai/fetch/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py
 create mode 100644 alr_envs/open_ai/reacher_v2/__init__.py
 create mode 100644 alr_envs/open_ai/reacher_v2/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py

diff --git a/README.md b/README.md
index ce95b8d..2ab4143 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou
 
 [//]:  |`HoleReacherDetPMP-v0`|
 
+### OpenAi-gym Environments
+These environments are wrapped-versions of their OpenAi-gym counterparts.
+
+|Name| Description|Horizon|Action Dimension|Context Dimension
+|---|---|---|---|---|
+|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
+|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
+|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
+|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
+
+
 ### Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index b1056a2..181b627 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor
 from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
+from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
 
 # Mujoco
@@ -560,6 +561,82 @@ register(
     }
 )
 
+## Open AI
+register(
+    id='ContinuousMountainCarDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.classic_control:MountainCarContinuous-v0",
+        "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 1,
+            "num_basis": 4,
+            "duration": 100,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": 1.,
+                "d_gains": 1.
+            }
+        }
+    }
+)
+
+register(
+    id='ReacherDetPMP-v2',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.mujoco:Reacher-v2",
+        "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 2,
+            "num_basis": 6,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": .6,
+                "d_gains": .075
+            }
+        }
+    }
+)
+
+register(
+    id='FetchSlideDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchSlideDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
+
+register(
+    id='FetchReachDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchReachDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
 
 # BBO functions
 
diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py
new file mode 100644
index 0000000..d001bc8
--- /dev/null
+++ b/alr_envs/examples/examples_open_ai.py
@@ -0,0 +1,41 @@
+from alr_envs.utils.make_env_helpers import make_env
+
+
+def example_mp(env_name, seed=1):
+    """
+    Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
+    For more information on motion primitive specific stuff, look at the mp examples.
+    Args:
+        env_name: DetPMP env_id
+        seed: seed
+
+    Returns:
+
+    """
+    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
+    env = make_env(env_name, seed)
+
+    rewards = 0
+    obs = env.reset()
+
+    # number of samples/full trajectories (multiple environment steps)
+    for i in range(10):
+        ac = env.action_space.sample()
+        obs, reward, done, info = env.step(ac)
+        rewards += reward
+
+        if done:
+            print(rewards)
+            rewards = 0
+            obs = env.reset()
+
+if __name__ == '__main__':
+    # DMP - not supported yet
+    #example_mp("ReacherDetPMP-v2")
+
+    # DetProMP
+    example_mp("ContinuousMountainCarDetPMP-v0")
+    example_mp("ReacherDetPMP-v2")
+    example_mp("FetchReachDenseDetPMP-v1")
+    example_mp("FetchSlideDenseDetPMP-v1")
+
diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py
new file mode 100644
index 0000000..4cff6da
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
new file mode 100644
index 0000000..960fc0c
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@@ -0,0 +1,17 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return 1.
\ No newline at end of file
diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
new file mode 100644
index 0000000..5b587fa
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[1]])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[0]])
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py
new file mode 100644
index 0000000..4c6d088
--- /dev/null
+++ b/alr_envs/open_ai/fetch/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py
new file mode 100644
index 0000000..2ac7b59
--- /dev/null
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+from gym import spaces
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        return self.initial_gripper_xpos
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py
new file mode 100644
index 0000000..c113db6
--- /dev/null
+++ b/alr_envs/open_ai/fetch/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][-5:-1]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][:4]
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py
new file mode 100644
index 0000000..a0acbea
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
new file mode 100644
index 0000000..be67a35
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        return self.goal
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
new file mode 100644
index 0000000..0fc622b
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qvel[:2]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qpos[:2]
\ No newline at end of file