From d4e844ac4500f29f58830f4ba41225dd43a5b951 Mon Sep 17 00:00:00 2001
From: Hongyi Zhou <wolfgangzhou1996@outlook.com>
Date: Wed, 16 Nov 2022 19:45:58 +0100
Subject: [PATCH] goal switching

---
 fancy_gym/black_box/black_box_wrapper.py      | 11 ++-----
 fancy_gym/envs/__init__.py                    |  2 +-
 .../envs/mujoco/table_tennis/mp_wrapper.py    |  4 +--
 .../mujoco/table_tennis/table_tennis_env.py   | 32 ++++++++++++-------
 .../examples/examples_movement_primitives.py  |  1 +
 5 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py
index d428184..698adce 100644
--- a/fancy_gym/black_box/black_box_wrapper.py
+++ b/fancy_gym/black_box/black_box_wrapper.py
@@ -163,15 +163,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
 
     def step(self, action: np.ndarray):
         """ This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
-        # time_valid = self.env.check_time_validity(action)
+        # time_is_valid = self.env.check_time_validity(action)
         #
         # if time_valid:
 
-            ## tricky part, only use weights basis
-            # basis_weights = action.reshape(7, -1)
-            # goal_weights = np.zeros((7, 1))
-            # action = np.concatenate((basis_weights, goal_weights), axis=1).flatten()
-
         # TODO remove this part, right now only needed for beer pong
         # mp_params, env_spec_params, proceed = self.env.episode_callback(action, self.traj_gen)
         position, velocity = self.get_trajectory(action)
@@ -253,9 +248,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
         else:
             obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity)
             return self.observation(obs), trajectory_return, done, infos
-        # else:
-        #         obs, trajectory_return, done, infos = self.env.time_invalid_traj_callback(action)
-        #         return self.observation(obs), trajectory_return, done, infos
+
     def render(self, **kwargs):
         """Only set render options here, such that they can be used during the rollout.
         This only needs to be called once"""
diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py
index ef52785..05cc631 100644
--- a/fancy_gym/envs/__init__.py
+++ b/fancy_gym/envs/__init__.py
@@ -260,7 +260,7 @@ for ctxt_dim in [2, 4]:
             "ctxt_dim": ctxt_dim,
             'frame_skip': 4,
             'enable_wind': False,
-            'enable_switching_goal': False,
+            'enable_switching_goal': True,
         }
     )
 
diff --git a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py
index 64185b9..1d6b3da 100644
--- a/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py
+++ b/fancy_gym/envs/mujoco/table_tennis/mp_wrapper.py
@@ -16,7 +16,7 @@ class MPWrapper(RawInterfaceWrapper):
             [False] * 7,  # joints velocity
             [True] * 2,  # position ball x, y
             [False] * 1,  # position ball z
-            [True] * 3,    # velocity ball x, y, z
+            [False] * 3,    # velocity ball x, y, z
             [True] * 2,  # target landing position
             # [True] * 1,  # time
         ])
@@ -33,7 +33,7 @@ class MPWrapper(RawInterfaceWrapper):
         return action[0] <= tau_bound[1] and action[0] >= tau_bound[0] \
                and action[1] <= delay_bound[1] and action[1] >= delay_bound[0]
 
-    def time_invalid_traj_callback(self, action) \
+    def time_invalid_traj_callback(self, action, pos_traj, vel_traj) \
         -> Tuple[np.ndarray, float, bool, dict]:
         tau_invalid_penalty = 3 * (np.max([0, action[0] - tau_bound[1]]) + np.max([0, tau_bound[0] - action[0]]))
         delay_invalid_penalty = 3 * (np.max([0, action[1] - delay_bound[1]]) + np.max([0, delay_bound[0] - action[1]]))
diff --git a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py
index 1e508d4..7de9a9b 100644
--- a/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py
+++ b/fancy_gym/envs/mujoco/table_tennis/table_tennis_env.py
@@ -13,6 +13,8 @@ MAX_EPISODE_STEPS_TABLE_TENNIS = 250
 CONTEXT_BOUNDS_2DIMS = np.array([[-1.0, -0.65], [-0.2, 0.65]])
 CONTEXT_BOUNDS_4DIMS = np.array([[-1.0, -0.65, -1.0, -0.65],
                                  [-0.2, 0.65, -0.2, 0.65]])
+CONTEXT_BOUNDS_SWICHING = np.array([[-1.0, -0.65, -1.0, 0.1],
+                                    [-0.2, 0.65, -0.2, 0.65]])
 
 
 class TableTennisEnv(MujocoEnv, utils.EzPickle):
@@ -20,9 +22,10 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
     7 DoF table tennis environment
     """
 
-    def __init__(self, ctxt_dim: int = 2, frame_skip: int = 4,
+    def __init__(self, ctxt_dim: int = 4, frame_skip: int = 4,
                  enable_switching_goal: bool = False,
-                 enable_wind: bool = False, enable_magnus: bool = False):
+                 enable_wind: bool = False, enable_magnus: bool = False,
+                 enable_air: bool = False):
         utils.EzPickle.__init__(**locals())
         self._steps = 0
 
@@ -53,14 +56,18 @@ class TableTennisEnv(MujocoEnv, utils.EzPickle):
             self.context_bounds = CONTEXT_BOUNDS_2DIMS
         elif ctxt_dim == 4:
             self.context_bounds = CONTEXT_BOUNDS_4DIMS
+            if self._enable_goal_switching:
+                self.context_bounds = CONTEXT_BOUNDS_SWICHING
         else:
             raise NotImplementedError
 
         self.action_space = spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32)
 
         # complex dynamics settings
-        # self.model.opt.density = 1.225
-        # self.model.opt.viscosity = 2.27e-5
+        if enable_air:
+            self.model.opt.density = 1.225
+            self.model.opt.viscosity = 2.27e-5
+
         self._enable_wind = enable_wind
         self._enable_magnus = enable_magnus
         self._wind_vel = np.zeros(3)
@@ -244,17 +251,20 @@ def plot_ball_traj_2d(x_traj, y_traj):
     ax.plot(x_traj, y_traj)
     plt.show()
 
-def plot_single_axis(traj):
+def plot_single_axis(traj, title):
     import matplotlib.pyplot as plt
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.plot(traj)
+    ax.set_title(title)
     plt.show()
 
 if __name__ == "__main__":
-    env = TableTennisEnv(enable_wind=True)
-    for _ in range(5):
-        obs = env.reset()
+    env = TableTennisEnv(enable_air=True)
+    # env_with_air = TableTennisEnv(enable_air=True)
+    for _ in range(1):
+        obs1 = env.reset()
+        # obs2 = env_with_air.reset()
         x_pos = []
         y_pos = []
         z_pos = []
@@ -262,8 +272,8 @@ if __name__ == "__main__":
         y_vel = []
         z_vel = []
         for _ in range(2000):
-            # env.render("human")
             obs, reward, done, info = env.step(np.zeros(7))
+            # _, _, _, _ = env_no_air.step(np.zeros(7))
             x_pos.append(env.data.joint("tar_x").qpos[0])
             y_pos.append(env.data.joint("tar_y").qpos[0])
             z_pos.append(env.data.joint("tar_z").qpos[0])
@@ -272,6 +282,6 @@ if __name__ == "__main__":
             z_vel.append(env.data.joint("tar_z").qvel[0])
             # print(reward)
             if done:
-                plot_ball_traj_2d(x_pos, y_pos)
-                plot_single_axis(x_vel)
+                # plot_ball_traj_2d(x_pos, y_pos)
+                plot_single_axis(x_pos, title="x_vel without air")
                 break
diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py
index 1cda812..e632f54 100644
--- a/fancy_gym/examples/examples_movement_primitives.py
+++ b/fancy_gym/examples/examples_movement_primitives.py
@@ -45,6 +45,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
         # This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the
         # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
         # to the return of a trajectory. Default is the sum over the step-wise rewards.
+        print(f'target obs: {obs[-3:]}')
         obs, reward, done, info = env.step(ac)
         print(f'steps: {info["num_steps"][-1]}')
         # Aggregated returns