From c2db2f8064b5e0735c7b4bf8caca87f84301ee29 Mon Sep 17 00:00:00 2001
From: Maximilian Huettenrauch <max.huettenrauch@gmail.com>
Date: Fri, 23 Apr 2021 12:47:55 +0200
Subject: [PATCH] bug fixes

---
 README.md                                     |   3 +-
 alr_envs/__init__.py                          |  16 ++-
 .../mujoco/ball_in_a_cup/ball_in_a_cup.py     |  27 ++++-
 alr_envs/utils/make_env_helpers.py            | 108 ------------------
 alr_envs/utils/mp_env_async_sampler.py        |  12 +-
 alr_envs/utils/wrapper/mp_wrapper.py          |   3 +-
 6 files changed, 45 insertions(+), 124 deletions(-)

diff --git a/README.md b/README.md
index 00f9025..41d87f2 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@ Currently we have the following environments:
 |`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27
 |`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27
 |`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27
+|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip
 |`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip
 |`ALRBallInACupGoal-v0`| Similiar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip
     
@@ -41,7 +42,7 @@ All environments provide the full episode reward and additional information abou
 |`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25
 |`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25
 |`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 
-|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for a simplified `ALRBallInACup-v0` task where only 3 joints are actuated. | 4000 | 15
+|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupSimple-v0` task where only 3 joints are actuated. | 4000 | 15
 |`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35
 |`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 
 
diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 3730249..f814080 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -71,12 +71,22 @@ register(
     }
 )
 
+register(
+    id='ALRBallInACupSimple-v0',
+    entry_point='alr_envs.mujoco:ALRBallInACupEnv',
+    max_episode_steps=4000,
+    kwargs={
+        "simplified": True,
+        "reward_type": "no_context"
+    }
+)
+
 register(
     id='ALRBallInACup-v0',
     entry_point='alr_envs.mujoco:ALRBallInACupEnv',
     max_episode_steps=4000,
     kwargs={
-        "reward_type": "simple"
+        "reward_type": "no_context"
     }
 )
 
@@ -209,7 +219,7 @@ register(
     id='ALRBallInACupSimpleDMP-v0',
     entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
     kwargs={
-        "name": "alr_envs:ALRBallInACup-v0",
+        "name": "alr_envs:ALRBallInACupSimple-v0",
         "num_dof": 3,
         "num_basis": 5,
         "duration": 3.5,
@@ -243,7 +253,7 @@ register(
 
 register(
     id='ALRBallInACupGoalDMP-v0',
-    entry_point='alr_envs.utils.make_env_helpers:make_dmp_env',
+    entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
     kwargs={
         "name": "alr_envs:ALRBallInACupGoal-v0",
         "num_dof": 7,
diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
index 19c6295..66461c0 100644
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
@@ -5,7 +5,8 @@ from alr_envs.mujoco import alr_mujoco_env
 
 
 class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
-    def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_type: str = None, context: np.ndarray = None):
+    def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
+                 reward_type: str = None, context: np.ndarray = None):
         self._steps = 0
 
         self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@@ -31,9 +32,11 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
         self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
         self._start_vel = np.zeros(7)
 
+        self.simplified = simplified
+
         self.sim_time = 8  # seconds
         self.sim_steps = int(self.sim_time / self.dt)
-        if reward_type == "simple":
+        if reward_type == "no_context":
             from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
             reward_function = BallInACupReward
         elif reward_type == "contextual_goal":
@@ -44,6 +47,20 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
         self.reward_function = reward_function(self.sim_steps)
         self.configure(context)
 
+    @property
+    def start_pos(self):
+        if self.simplified:
+            return self._start_pos[1::2]
+        else:
+            return self._start_pos
+
+    @property
+    def start_vel(self):
+        if self.simplified:
+            return self._start_vel[1::2]
+        else:
+            return self._start_vel
+
     @property
     def current_pos(self):
         return self.sim.data.qpos[0:7].copy()
@@ -58,7 +75,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
 
     def reset_model(self):
         init_pos_all = self.init_qpos.copy()
-        init_pos_robot = self.start_pos
+        init_pos_robot = self._start_pos
         init_vel = np.zeros_like(init_pos_all)
 
         self._steps = 0
@@ -114,14 +131,14 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
 
     # These functions are for the task with 3 joint actuations
     def extend_des_pos(self, des_pos):
-        des_pos_full = self.start_pos.copy()
+        des_pos_full = self._start_pos.copy()
         des_pos_full[1] = des_pos[0]
         des_pos_full[3] = des_pos[1]
         des_pos_full[5] = des_pos[2]
         return des_pos_full
 
     def extend_des_vel(self, des_vel):
-        des_vel_full = self.start_vel.copy()
+        des_vel_full = self._start_vel.copy()
         des_vel_full[1] = des_vel[0]
         des_vel_full[3] = des_vel[1]
         des_vel_full[5] = des_vel[2]
diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py
index d7cd959..c0e55b4 100644
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@@ -27,111 +27,3 @@ def make_detpmp_env(**kwargs):
     name = kwargs.pop("name")
     _env = gym.make(name)
     return DetPMPWrapper(_env, **kwargs)
-
-
-# def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
-#     assert shared_memory is None
-#     env = env_fn()
-#     parent_pipe.close()
-#     try:
-#         while True:
-#             command, data = pipe.recv()
-#             if command == 'reset':
-#                 observation = env.reset()
-#                 pipe.send((observation, True))
-#             elif command == 'configure':
-#                 env.configure(data)
-#                 pipe.send((None, True))
-#             elif command == 'step':
-#                 observation, reward, done, info = env.step(data)
-#                 if done:
-#                     observation = env.reset()
-#                 pipe.send(((observation, reward, done, info), True))
-#             elif command == 'seed':
-#                 env.seed(data)
-#                 pipe.send((None, True))
-#             elif command == 'close':
-#                 pipe.send((None, True))
-#                 break
-#             elif command == '_check_observation_space':
-#                 pipe.send((data == env.observation_space, True))
-#             else:
-#                 raise RuntimeError('Received unknown command `{0}`. Must '
-#                     'be one of {`reset`, `step`, `seed`, `close`, '
-#                     '`_check_observation_space`}.'.format(command))
-#     except (KeyboardInterrupt, Exception):
-#         error_queue.put((index,) + sys.exc_info()[:2])
-#         pipe.send((None, False))
-#     finally:
-#         env.close()
-#
-#
-# def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
-#     assert shared_memory is not None
-#     env = env_fn()
-#     observation_space = env.observation_space
-#     parent_pipe.close()
-#     try:
-#         while True:
-#             command, data = pipe.recv()
-#             if command == 'reset':
-#                 observation = env.reset()
-#                 write_to_shared_memory(index, observation, shared_memory,
-#                                        observation_space)
-#                 pipe.send((None, True))
-#             elif command == 'configure':
-#                 env.configure(data)
-#                 pipe.send((None, True))
-#             elif command == 'step':
-#                 observation, reward, done, info = env.step(data)
-#                 if done:
-#                     observation = env.reset()
-#                 write_to_shared_memory(index, observation, shared_memory,
-#                                        observation_space)
-#                 pipe.send(((None, reward, done, info), True))
-#             elif command == 'seed':
-#                 env.seed(data)
-#                 pipe.send((None, True))
-#             elif command == 'close':
-#                 pipe.send((None, True))
-#                 break
-#             elif command == '_check_observation_space':
-#                 pipe.send((data == observation_space, True))
-#             else:
-#                 raise RuntimeError('Received unknown command `{0}`. Must '
-#                     'be one of {`reset`, `step`, `seed`, `close`, '
-#                     '`_check_observation_space`}.'.format(command))
-#     except (KeyboardInterrupt, Exception):
-#         error_queue.put((index,) + sys.exc_info()[:2])
-#         pipe.send((None, False))
-#     finally:
-#         env.close()
-
-
-# def viapoint_dmp(**kwargs):
-#     _env = gym.make("alr_envs:ViaPointReacher-v0")
-#     # _env = ViaPointReacher(**kwargs)
-#     return DmpWrapper(_env, num_dof=5, num_basis=5, duration=2, alpha_phase=2.5, dt=_env.dt,
-#                       start_pos=_env.start_pos, learn_goal=False, policy_type="velocity", weights_scale=50)
-#
-#
-# def holereacher_dmp(**kwargs):
-#     _env = gym.make("alr_envs:HoleReacher-v0")
-#     # _env = HoleReacher(**kwargs)
-#     return DmpWrapper(_env, num_dof=5, num_basis=5, duration=2, dt=_env.dt, learn_goal=True, alpha_phase=2,
-#                       start_pos=_env.start_pos, policy_type="velocity", weights_scale=50, goal_scale=0.1)
-#
-#
-# def holereacher_fix_goal_dmp(**kwargs):
-#     _env = gym.make("alr_envs:HoleReacher-v0")
-#     # _env = HoleReacher(**kwargs)
-#     return DmpWrapper(_env, num_dof=5, num_basis=5, duration=2, dt=_env.dt, learn_goal=False, alpha_phase=2,
-#                       start_pos=_env.start_pos, policy_type="velocity", weights_scale=50, goal_scale=1,
-#                       final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]))
-#
-#
-# def holereacher_detpmp(**kwargs):
-#     _env = gym.make("alr_envs:HoleReacher-v0")
-#     # _env = HoleReacher(**kwargs)
-#     return DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.005, policy_type="velocity", start_pos=_env.start_pos,
-#                          duration=2, post_traj_time=0, dt=_env.dt, weights_scale=0.25, zero_start=True, zero_goal=False)
diff --git a/alr_envs/utils/mp_env_async_sampler.py b/alr_envs/utils/mp_env_async_sampler.py
index 68d7e2c..344cb37 100644
--- a/alr_envs/utils/mp_env_async_sampler.py
+++ b/alr_envs/utils/mp_env_async_sampler.py
@@ -23,7 +23,7 @@ def split_array(ary, size):
         split = [k * size for k in range(1, repeat)]
         sub_arys = np.split(ary, split)
 
-        if n_samples % repeat != 0:
+        if n_samples % size != 0:
             tmp = np.zeros_like(sub_arys[0])
             last = sub_arys[-1]
             tmp[0: len(last)] = last
@@ -42,8 +42,8 @@ def _flatten_list(l):
 
 class AlrMpEnvSampler:
     """
-    An asynchronous sampler for MPWrapper environments. A sampler object can be called with a set of parameters and
-    returns the corresponding final obs, rewards, dones and info dicts.
+    An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of
+    parameters and returns the corresponding final obs, rewards, dones and info dicts.
     """
     def __init__(self, env_id, num_envs, seed=0):
         self.num_envs = num_envs
@@ -68,10 +68,10 @@ class AlrMpEnvSampler:
 
 
 if __name__ == "__main__":
-    env_name = "alr_envs:HoleReacherDMP-v0"
+    env_name = "alr_envs:ALRBallInACupSimpleDMP-v0"
     n_cpu = 8
-    dim = 30
-    n_samples = 20
+    dim = 15
+    n_samples = 10
 
     sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
 
diff --git a/alr_envs/utils/wrapper/mp_wrapper.py b/alr_envs/utils/wrapper/mp_wrapper.py
index 769a868..e9ab5f1 100644
--- a/alr_envs/utils/wrapper/mp_wrapper.py
+++ b/alr_envs/utils/wrapper/mp_wrapper.py
@@ -66,7 +66,7 @@ class MPWrapper(gym.Wrapper, ABC):
 
         if self.post_traj_steps > 0:
             trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
-            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.dmp.num_dimensions))])
+            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))])
 
         # self._trajectory = trajectory
         # self._velocity = velocity
@@ -76,6 +76,7 @@ class MPWrapper(gym.Wrapper, ABC):
 
         # TODO: @Max Why do we need this configure, states should be part of the model
         # TODO: Ask Onur if the context distribution needs to be outside the environment
+        # TODO: For now create a new env with each context
         # self.env.configure(context)
         obs = self.env.reset()
         info = {}