From 87eb093c2c86423b755ade920b2429d70f13a0a4 Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Thu, 1 Jul 2021 14:55:14 +0200
Subject: [PATCH 1/6] Add open ai gym environments

---
 README.md                                     | 11 +++
 alr_envs/__init__.py                          | 77 +++++++++++++++++++
 alr_envs/examples/examples_open_ai.py         | 41 ++++++++++
 alr_envs/open_ai/__init__.py                  |  0
 .../continuous_mountain_car/__init__.py       |  2 +
 .../continuous_mountain_car/mp_wrapper.py     | 17 ++++
 .../positional_wrapper.py                     | 13 ++++
 alr_envs/open_ai/fetch/__init__.py            |  2 +
 alr_envs/open_ai/fetch/mp_wrapper.py          | 18 +++++
 alr_envs/open_ai/fetch/positional_wrapper.py  | 13 ++++
 alr_envs/open_ai/reacher_v2/__init__.py       |  2 +
 alr_envs/open_ai/reacher_v2/mp_wrapper.py     | 18 +++++
 .../open_ai/reacher_v2/positional_wrapper.py  | 13 ++++
 13 files changed, 227 insertions(+)
 create mode 100644 alr_envs/examples/examples_open_ai.py
 create mode 100644 alr_envs/open_ai/__init__.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/__init__.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
 create mode 100644 alr_envs/open_ai/fetch/__init__.py
 create mode 100644 alr_envs/open_ai/fetch/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py
 create mode 100644 alr_envs/open_ai/reacher_v2/__init__.py
 create mode 100644 alr_envs/open_ai/reacher_v2/mp_wrapper.py
 create mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py

diff --git a/README.md b/README.md
index ce95b8d..2ab4143 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou
 
 [//]:  |`HoleReacherDetPMP-v0`|
 
+### OpenAi-gym Environments
+These environments are wrapped-versions of their OpenAi-gym counterparts.
+
+|Name| Description|Horizon|Action Dimension|Context Dimension
+|---|---|---|---|---|
+|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
+|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
+|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
+|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
+
+
 ### Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index b1056a2..181b627 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor
 from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
+from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
 
 # Mujoco
@@ -560,6 +561,82 @@ register(
     }
 )
 
+## Open AI
+register(
+    id='ContinuousMountainCarDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.classic_control:MountainCarContinuous-v0",
+        "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 1,
+            "num_basis": 4,
+            "duration": 100,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": 1.,
+                "d_gains": 1.
+            }
+        }
+    }
+)
+
+register(
+    id='ReacherDetPMP-v2',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.mujoco:Reacher-v2",
+        "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 2,
+            "num_basis": 6,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": .6,
+                "d_gains": .075
+            }
+        }
+    }
+)
+
+register(
+    id='FetchSlideDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchSlideDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
+
+register(
+    id='FetchReachDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchReachDense-v1",
+        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
 
 # BBO functions
 
diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py
new file mode 100644
index 0000000..d001bc8
--- /dev/null
+++ b/alr_envs/examples/examples_open_ai.py
@@ -0,0 +1,41 @@
+from alr_envs.utils.make_env_helpers import make_env
+
+
+def example_mp(env_name, seed=1):
+    """
+    Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
+    For more information on motion primitive specific stuff, look at the mp examples.
+    Args:
+        env_name: DetPMP env_id
+        seed: seed
+
+    Returns:
+
+    """
+    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
+    env = make_env(env_name, seed)
+
+    rewards = 0
+    obs = env.reset()
+
+    # number of samples/full trajectories (multiple environment steps)
+    for i in range(10):
+        ac = env.action_space.sample()
+        obs, reward, done, info = env.step(ac)
+        rewards += reward
+
+        if done:
+            print(rewards)
+            rewards = 0
+            obs = env.reset()
+
+if __name__ == '__main__':
+    # DMP - not supported yet
+    #example_mp("ReacherDetPMP-v2")
+
+    # DetProMP
+    example_mp("ContinuousMountainCarDetPMP-v0")
+    example_mp("ReacherDetPMP-v2")
+    example_mp("FetchReachDenseDetPMP-v1")
+    example_mp("FetchSlideDenseDetPMP-v1")
+
diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py
new file mode 100644
index 0000000..4cff6da
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
new file mode 100644
index 0000000..960fc0c
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@@ -0,0 +1,17 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return 1.
\ No newline at end of file
diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
new file mode 100644
index 0000000..5b587fa
--- /dev/null
+++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[1]])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[0]])
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py
new file mode 100644
index 0000000..4c6d088
--- /dev/null
+++ b/alr_envs/open_ai/fetch/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py
new file mode 100644
index 0000000..2ac7b59
--- /dev/null
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+from gym import spaces
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def start_pos(self):
+        return self.initial_gripper_xpos
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py
new file mode 100644
index 0000000..c113db6
--- /dev/null
+++ b/alr_envs/open_ai/fetch/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][-5:-1]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self._get_obs()["observation"][:4]
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py
new file mode 100644
index 0000000..a0acbea
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/__init__.py
@@ -0,0 +1,2 @@
+from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
+from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
new file mode 100644
index 0000000..be67a35
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@@ -0,0 +1,18 @@
+from typing import Union
+
+from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def start_pos(self):
+        raise ValueError("Start position is not available")
+
+    @property
+    def goal_pos(self):
+        return self.goal
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
new file mode 100644
index 0000000..0fc622b
--- /dev/null
+++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
@@ -0,0 +1,13 @@
+from typing import Union
+import numpy as np
+from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
+
+
+class PositionalWrapper(PositionalEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qvel[:2]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qpos[:2]
\ No newline at end of file

From 585bdc7b47dae56017db19c17a81e337d20a1c50 Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Fri, 2 Jul 2021 06:12:32 +0200
Subject: [PATCH 2/6] Add dm_control to setup.py

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7170fa6..cc9445c 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,8 @@ setup(
         'PyQt5',
         'matplotlib',
         'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
-        'mujoco_py'
+        'mujoco_py',
+        'dm_control'
     ],
 
     url='https://github.com/ALRhub/alr_envs/',

From 28d10ef0894dd0f974886f5c03b9ada9646162dd Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Mon, 5 Jul 2021 08:14:39 +0200
Subject: [PATCH 3/6] Bug fixes for wrapped OpenAi environments

---
 alr_envs/__init__.py                                   | 6 +++---
 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py | 2 +-
 alr_envs/open_ai/fetch/positional_wrapper.py           | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 181b627..2611701 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -571,7 +571,7 @@ register(
         "mp_kwargs": {
             "num_dof": 1,
             "num_basis": 4,
-            "duration": 100,
+            "duration": 2,
             "post_traj_time": 0,
             "width": 0.02,
             "policy_type": "motor",
@@ -613,7 +613,7 @@ register(
         "mp_kwargs": {
             "num_dof": 4,
             "num_basis": 5,
-            "duration": 1,
+            "duration": 2,
             "post_traj_time": 0,
             "width": 0.02,
             "policy_type": "position"
@@ -630,7 +630,7 @@ register(
         "mp_kwargs": {
             "num_dof": 4,
             "num_basis": 5,
-            "duration": 1,
+            "duration": 2,
             "post_traj_time": 0,
             "width": 0.02,
             "policy_type": "position"
diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
index 960fc0c..886b1e1 100644
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@@ -14,4 +14,4 @@ class MPWrapper(MPEnvWrapper):
 
     @property
     def dt(self) -> Union[float, int]:
-        return 1.
\ No newline at end of file
+        return 0.02
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py
index c113db6..9c6dcf2 100644
--- a/alr_envs/open_ai/fetch/positional_wrapper.py
+++ b/alr_envs/open_ai/fetch/positional_wrapper.py
@@ -6,8 +6,8 @@ from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
 class PositionalWrapper(PositionalEnvWrapper):
     @property
     def current_vel(self) -> Union[float, int, np.ndarray]:
-        return self._get_obs()["observation"][-5:-1]
+        return self.unwrapped._get_obs()["observation"][-5:-1]
 
     @property
     def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self._get_obs()["observation"][:4]
\ No newline at end of file
+        return self.unwrapped._get_obs()["observation"][:4]
\ No newline at end of file

From 0046ade102be85d56c79933ac180004b06b13e7e Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Mon, 5 Jul 2021 09:16:36 +0200
Subject: [PATCH 4/6] Adopt new interface structure

---
 alr_envs/__init__.py                                |  8 ++++----
 .../open_ai/continuous_mountain_car/__init__.py     |  1 -
 .../open_ai/continuous_mountain_car/mp_wrapper.py   |  9 +++++++--
 .../continuous_mountain_car/positional_wrapper.py   | 13 -------------
 alr_envs/open_ai/fetch/__init__.py                  |  1 -
 alr_envs/open_ai/fetch/mp_wrapper.py                | 10 +++++++---
 alr_envs/open_ai/fetch/positional_wrapper.py        | 13 -------------
 alr_envs/open_ai/reacher_v2/__init__.py             |  1 -
 alr_envs/open_ai/reacher_v2/mp_wrapper.py           |  9 +++++++--
 alr_envs/open_ai/reacher_v2/positional_wrapper.py   | 13 -------------
 10 files changed, 25 insertions(+), 53 deletions(-)
 delete mode 100644 alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
 delete mode 100644 alr_envs/open_ai/fetch/positional_wrapper.py
 delete mode 100644 alr_envs/open_ai/reacher_v2/positional_wrapper.py

diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 144b5f2..0dfc1f5 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -573,7 +573,7 @@ register(
     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
     kwargs={
         "name": "gym.envs.classic_control:MountainCarContinuous-v0",
-        "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
+        "wrappers": [continuous_mountain_car.MPWrapper],
         "mp_kwargs": {
             "num_dof": 1,
             "num_basis": 4,
@@ -594,7 +594,7 @@ register(
     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
     kwargs={
         "name": "gym.envs.mujoco:Reacher-v2",
-        "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
+        "wrappers": [reacher_v2.MPWrapper],
         "mp_kwargs": {
             "num_dof": 2,
             "num_basis": 6,
@@ -615,7 +615,7 @@ register(
     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
     kwargs={
         "name": "gym.envs.robotics:FetchSlideDense-v1",
-        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "wrappers": [fetch.MPWrapper],
         "mp_kwargs": {
             "num_dof": 4,
             "num_basis": 5,
@@ -632,7 +632,7 @@ register(
     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
     kwargs={
         "name": "gym.envs.robotics:FetchReachDense-v1",
-        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
+        "wrappers": [fetch.MPWrapper],
         "mp_kwargs": {
             "num_dof": 4,
             "num_basis": 5,
diff --git a/alr_envs/open_ai/continuous_mountain_car/__init__.py b/alr_envs/open_ai/continuous_mountain_car/__init__.py
index 4cff6da..36f731d 100644
--- a/alr_envs/open_ai/continuous_mountain_car/__init__.py
+++ b/alr_envs/open_ai/continuous_mountain_car/__init__.py
@@ -1,2 +1 @@
-from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
index 886b1e1..f0bccab 100644
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@@ -1,12 +1,17 @@
 from typing import Union
 
+import numpy as np
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
     @property
-    def start_pos(self):
-        raise ValueError("Start position is not available")
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[1]])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[0]])
 
     @property
     def goal_pos(self):
diff --git a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
deleted file mode 100644
index 5b587fa..0000000
--- a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Union
-import numpy as np
-from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
-
-
-class PositionalWrapper(PositionalEnvWrapper):
-    @property
-    def current_vel(self) -> Union[float, int, np.ndarray]:
-        return np.array([self.state[1]])
-
-    @property
-    def current_pos(self) -> Union[float, int, np.ndarray]:
-        return np.array([self.state[0]])
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/__init__.py b/alr_envs/open_ai/fetch/__init__.py
index 4c6d088..2e68176 100644
--- a/alr_envs/open_ai/fetch/__init__.py
+++ b/alr_envs/open_ai/fetch/__init__.py
@@ -1,2 +1 @@
-from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py
index 2ac7b59..acb07a3 100644
--- a/alr_envs/open_ai/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@@ -1,13 +1,17 @@
 from typing import Union
 
-from gym import spaces
+import numpy as np
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
     @property
-    def start_pos(self):
-        return self.initial_gripper_xpos
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.unwrapped._get_obs()["observation"][-5:-1]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.unwrapped._get_obs()["observation"][:4]
 
     @property
     def goal_pos(self):
diff --git a/alr_envs/open_ai/fetch/positional_wrapper.py b/alr_envs/open_ai/fetch/positional_wrapper.py
deleted file mode 100644
index 9c6dcf2..0000000
--- a/alr_envs/open_ai/fetch/positional_wrapper.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Union
-import numpy as np
-from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
-
-
-class PositionalWrapper(PositionalEnvWrapper):
-    @property
-    def current_vel(self) -> Union[float, int, np.ndarray]:
-        return self.unwrapped._get_obs()["observation"][-5:-1]
-
-    @property
-    def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.unwrapped._get_obs()["observation"][:4]
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/__init__.py b/alr_envs/open_ai/reacher_v2/__init__.py
index a0acbea..48a5615 100644
--- a/alr_envs/open_ai/reacher_v2/__init__.py
+++ b/alr_envs/open_ai/reacher_v2/__init__.py
@@ -1,2 +1 @@
-from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
\ No newline at end of file
diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
index be67a35..7636f50 100644
--- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@@ -1,13 +1,18 @@
 from typing import Union
 
+import numpy as np
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
 
     @property
-    def start_pos(self):
-        raise ValueError("Start position is not available")
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qvel[:2]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qpos[:2]
 
     @property
     def goal_pos(self):
diff --git a/alr_envs/open_ai/reacher_v2/positional_wrapper.py b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
deleted file mode 100644
index 0fc622b..0000000
--- a/alr_envs/open_ai/reacher_v2/positional_wrapper.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from typing import Union
-import numpy as np
-from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
-
-
-class PositionalWrapper(PositionalEnvWrapper):
-    @property
-    def current_vel(self) -> Union[float, int, np.ndarray]:
-        return self.sim.data.qvel[:2]
-
-    @property
-    def current_pos(self) -> Union[float, int, np.ndarray]:
-        return self.sim.data.qpos[:2]
\ No newline at end of file

From 92e6a84d0333232cbeae0474c52bd6dcda1c37ab Mon Sep 17 00:00:00 2001
From: Marcel <unsux@student.kit.edu>
Date: Mon, 5 Jul 2021 09:52:41 +0200
Subject: [PATCH 5/6] Adopt new interface structure

---
 README.md                                              |  3 ++-
 alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py |  2 +-
 alr_envs/open_ai/fetch/mp_wrapper.py                   |  2 +-
 alr_envs/open_ai/reacher_v2/mp_wrapper.py              |  6 +-----
 reacher.egg-info/PKG-INFO                              | 10 ----------
 reacher.egg-info/SOURCES.txt                           |  7 -------
 reacher.egg-info/dependency_links.txt                  |  1 -
 reacher.egg-info/requires.txt                          |  1 -
 reacher.egg-info/top_level.txt                         |  1 -
 setup.py                                               |  4 ++--
 10 files changed, 7 insertions(+), 30 deletions(-)
 delete mode 100644 reacher.egg-info/PKG-INFO
 delete mode 100644 reacher.egg-info/SOURCES.txt
 delete mode 100644 reacher.egg-info/dependency_links.txt
 delete mode 100644 reacher.egg-info/requires.txt
 delete mode 100644 reacher.egg-info/top_level.txt

diff --git a/README.md b/README.md
index 2ab4143..56ec0bf 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,8 @@
 This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. 
 Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
 For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
-We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
+We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually). 
+When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
 When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.
 
 ## Environments
diff --git a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
index f0bccab..29378ed 100644
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 import numpy as np
-from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
diff --git a/alr_envs/open_ai/fetch/mp_wrapper.py b/alr_envs/open_ai/fetch/mp_wrapper.py
index acb07a3..6602a18 100644
--- a/alr_envs/open_ai/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 import numpy as np
-from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
diff --git a/alr_envs/open_ai/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
index 7636f50..d3181b5 100644
--- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@@ -1,7 +1,7 @@
 from typing import Union
 
 import numpy as np
-from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
 
 
 class MPWrapper(MPEnvWrapper):
@@ -14,10 +14,6 @@ class MPWrapper(MPEnvWrapper):
     def current_pos(self) -> Union[float, int, np.ndarray]:
         return self.sim.data.qpos[:2]
 
-    @property
-    def goal_pos(self):
-        return self.goal
-
     @property
     def dt(self) -> Union[float, int]:
         return self.env.dt
\ No newline at end of file
diff --git a/reacher.egg-info/PKG-INFO b/reacher.egg-info/PKG-INFO
deleted file mode 100644
index 9ea9f7e..0000000
--- a/reacher.egg-info/PKG-INFO
+++ /dev/null
@@ -1,10 +0,0 @@
-Metadata-Version: 1.0
-Name: reacher
-Version: 0.0.1
-Summary: UNKNOWN
-Home-page: UNKNOWN
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: UNKNOWN
-Description: UNKNOWN
-Platform: UNKNOWN
diff --git a/reacher.egg-info/SOURCES.txt b/reacher.egg-info/SOURCES.txt
deleted file mode 100644
index b771181..0000000
--- a/reacher.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-README.md
-setup.py
-reacher.egg-info/PKG-INFO
-reacher.egg-info/SOURCES.txt
-reacher.egg-info/dependency_links.txt
-reacher.egg-info/requires.txt
-reacher.egg-info/top_level.txt
\ No newline at end of file
diff --git a/reacher.egg-info/dependency_links.txt b/reacher.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/reacher.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/reacher.egg-info/requires.txt b/reacher.egg-info/requires.txt
deleted file mode 100644
index 1e6c2dd..0000000
--- a/reacher.egg-info/requires.txt
+++ /dev/null
@@ -1 +0,0 @@
-gym
diff --git a/reacher.egg-info/top_level.txt b/reacher.egg-info/top_level.txt
deleted file mode 100644
index 8b13789..0000000
--- a/reacher.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/setup.py b/setup.py
index cc9445c..703bbee 100644
--- a/setup.py
+++ b/setup.py
@@ -3,14 +3,14 @@ from setuptools import setup
 setup(
     name='alr_envs',
     version='0.0.1',
-    packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
+    packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
               'alr_envs.utils'],
     install_requires=[
         'gym',
         'PyQt5',
         'matplotlib',
         'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
-        'mujoco_py',
+        'mujoco-py<2.1,>=2.0',
         'dm_control'
     ],
 

From d2414797c7f2e8156fc76213e61b1431c7be6f7b Mon Sep 17 00:00:00 2001
From: ottofabian <fabian.otto@partner.kit.edu>
Date: Fri, 23 Jul 2021 15:18:39 +0200
Subject: [PATCH 6/6] Update README.md

---
 README.md | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 56ec0bf..a4ad30f 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,11 @@
-## ALR Environments
+## ALR Robotics Control Environments
     
 This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. 
 Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
 For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
 We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually). 
-When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
-When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.
 
-## Environments
+## Step-based Environments
 Currently we have the following environments: 
 
 ### Mujoco
@@ -33,11 +31,13 @@ Currently we have the following environments:
 |`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18 
 |`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18
 
-### DMP Environments
-These environments are closer to stochastic search. They always execute a full trajectory, which is computed by a DMP and executed by a controller, e.g. a PD controller.
-The goal is to learn the parameters of this DMP to generate a suitable trajectory. 
-All environments provide the full episode reward and additional information about early terminations, e.g. due to collisions. 
+## Motion Primitive Environments (Episodic environments)
+Unlike step-based environments, these motion primitive (MP) environments are closer to stochastic search and what can be found in robotics. They always execute a full trajectory, which is computed by a Dynamic Motion Primitive (DMP) or Probabilitic Motion Primitive (DetPMP) and translated into individual actions with a controller, e.g. a PD controller. The actual Controller, however, depends on the type of environment, i.e. position, velocity, or torque controlled.
+The goal is to learn the parametrization of the motion primitives in order to generate a suitable trajectory. 
+MP This can also be done in a contextual setting, where all changing elements of the task are exposed once in the beginning. This requires to find a new parametrization for each trajectory.
+All environments provide the full cumulative episode reward and additional information about early terminations, e.g. due to collisions. 
 
+### Classic Control
 |Name| Description|Horizon|Action Dimension|Context Dimension
 |---|---|---|---|---|
 |`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25
@@ -49,18 +49,31 @@ All environments provide the full episode reward and additional information abou
 
 [//]:  |`HoleReacherDetPMP-v0`|
 
-### OpenAi-gym Environments
-These environments are wrapped-versions of their OpenAi-gym counterparts.
+### OpenAI gym Environments
+These environments are wrapped-versions of their OpenAI-gym counterparts.
 
-|Name| Description|Horizon|Action Dimension|Context Dimension
+|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
 |---|---|---|---|---|
 |`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
 |`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
 |`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
 |`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
 
+### Deep Mind Control Suite Environments
+These environments are wrapped-versions of their Deep Mind Control Suite (DMC) counterparts.
+Given most task can be solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task. 
 
-### Stochastic Search
+|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
+|---|---|---|---|---|
+|`dmc_ball_in_cup-catch_detpmp-v0`| A DetPmP wrapped version of the "catch" task for the "ball_in_cup" environment. | 50 | 10 | 2
+|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 50| 10 | 2
+|`dmc_reacher-easy_detpmp-v0`| A DetPmP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4
+|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4
+|`dmc_reacher-hard_detpmp-v0`| A DetPmP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4
+|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4
+
+
+## Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
 |`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. | 1 | `{dim}` | 0
@@ -96,4 +109,4 @@ for i in range(10000):
 
 ``` 
 
-For an example using a DMP wrapped env and asynchronous sampling look at [mp_env_async_sampler.py](./alr_envs/utils/mp_env_async_sampler.py)
\ No newline at end of file
+For an example using a DMP wrapped env and asynchronous sampling look at [mp_env_async_sampler.py](./alr_envs/utils/mp_env_async_sampler.py)