added new mp wrappers to all environments

2022-06-29 10:39:28 +02:00 · 2022-06-29 10:39:28 +02:00 · 6e06e11cfa
commit 6e06e11cfa
parent 02b8a65bab
9 changed files with 180 additions and 5 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -537,7 +537,7 @@ for _v in _versions:
    register(
        id=_env_id,
        entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
-        kwargs=kwargs_dict_bp_promp_fixed_release
+        kwargs=kwargs_dict_bp_promp
    )
    ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
 ########################################################################################################################
--- a/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/classic_control/hole_reacher/new_mp_wrapper.py
@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class NewMPWrapper(RawInterfaceWrapper):
+
+    def get_context_mask(self):
+        return np.hstack([
+            [self.env.random_start] * self.env.n_links,  # cos
+            [self.env.random_start] * self.env.n_links,  # sin
+            [self.env.random_start] * self.env.n_links,  # velocity
+            [self.env.initial_width is None],  # hole width
+            # [self.env.hole_depth is None],  # hole depth
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_pos
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_vel
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/classic_control/simple_reacher/new_mp_wrapper.py
@ -0,0 +1,31 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+    def context_mask(self):
+        return np.hstack([
+            [self.env.random_start] * self.env.n_links,  # cos
+            [self.env.random_start] * self.env.n_links,  # sin
+            [self.env.random_start] * self.env.n_links,  # velocity
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_pos
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_vel
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/classic_control/viapoint_reacher/new_mp_wrapper.py
@ -0,0 +1,32 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+
+    def context_mask(self):
+        return np.hstack([
+            [self.env.random_start] * self.env.n_links,  # cos
+            [self.env.random_start] * self.env.n_links,  # sin
+            [self.env.random_start] * self.env.n_links,  # velocity
+            [self.env.initial_via_target is None] * 2,  # x-y coordinates of via point distance
+            [True] * 2,  # x-y coordinates of target distance
+            [False]  # env steps
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_pos
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.current_vel
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/half_cheetah_jump/new_mp_wrapper.py
@ -0,0 +1,24 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+    def context_mask(self):
+        return np.hstack([
+            [False] * 17,
+            [True] # goal height
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.env.sim.data.qpos[3:9].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qvel[3:9].copy()
+
--- a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py
@ -10,7 +10,7 @@ class MPWrapper(MPEnvWrapper):
    def active_obs(self):
        return np.hstack([
            [False] * 17,
-            [True] # goal pos
+            [True]  # goal pos
        ])

    @property
--- a/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/hopper_throw/new_mp_wrapper.py
@ -0,0 +1,27 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+    def context_mask(self):
+        return np.hstack([
+            [False] * 17,
+            [True] # goal pos
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.env.sim.data.qpos[3:6].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qvel[3:6].copy()
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py
@ -2,8 +2,10 @@ from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
 from typing import Union, Tuple
 import numpy as np

+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper

-class MPWrapper(BlackBoxWrapper):
+
+class MPWrapper(RawInterfaceWrapper):

    @property
    def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
@ -12,7 +14,7 @@ class MPWrapper(BlackBoxWrapper):
    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
        return self.env.sim.data.qvel.flat[:self.env.n_links]

-    def get_context_mask(self):
+    def context_mask(self):
        return np.concatenate([
            [False] * self.env.n_links,  # cos
            [False] * self.env.n_links,  # sin
@ -21,4 +23,4 @@ class MPWrapper(BlackBoxWrapper):
            [False] * 3,  # goal distance
            # self.get_body_com("target"),  # only return target to make problem harder
            [False],  # step
-        ])
+        ])
--- a/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
+++ b/alr_envs/alr/mujoco/walker_2d_jump/new_mp_wrapper.py
@ -0,0 +1,28 @@
+from typing import Tuple, Union
+
+import numpy as np
+
+from mp_env_api import MPEnvWrapper
+
+from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper
+
+
+class MPWrapper(RawInterfaceWrapper):
+    def context_mask(self):
+        return np.hstack([
+            [False] * 17,
+            [True] # goal pos
+        ])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.env.sim.data.qpos[3:9].copy()
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
+        return self.env.sim.data.qvel[3:9].copy()
+
+    @property
+    def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+