From 7f64c975cd061159db1b1c365b3b513bf0ac8db3 Mon Sep 17 00:00:00 2001
From: Onur <celik@kit.edu>
Date: Wed, 27 Apr 2022 16:15:17 +0200
Subject: [PATCH] shorter number of release steps for beerpong

---
 alr_envs/alr/__init__.py                      | 10 ++--
 alr_envs/alr/mujoco/beerpong/beerpong.py      |  7 ++-
 .../mujoco/beerpong/beerpong_reward_staged.py | 53 +++++++++++++++----
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py
index 8f37f78..ac885fa 100644
--- a/alr_envs/alr/__init__.py
+++ b/alr_envs/alr/__init__.py
@@ -559,11 +559,13 @@ for _v in _versions:
             "mp_kwargs": {
                 "num_dof": 7,
                 "num_basis": 2,
-                "duration": 1,
-                "post_traj_time": 2,
+                # "duration": 1,
+                "duration": 0.5,
+                # "post_traj_time": 2,
+                "post_traj_time": 2.5,
                 "policy_type": "motor",
-                # "weights_scale": 0.15,
-                "weights_scale": 1,
+                "weights_scale": 0.14,
+                # "weights_scale": 1,
                 "zero_start": True,
                 "zero_goal": False,
                 "policy_kwargs": {
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py
index d885e78..f5d2bd8 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@@ -41,9 +41,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
         self.ball_id = 11
 
         # self._release_step = 175  # time step of ball release
-        self._release_step = 130  # time step of ball release
+        # self._release_step = 130  # time step of ball release
+        self._release_step = 100  # time step of ball release
 
-        self.sim_time = 3  # seconds
         self.ep_length = 600  # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
         self.cup_table_id = 10
 
@@ -54,6 +54,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
 
         reward_function = BeerPongReward
         self.reward_function = reward_function()
+        self.n_table_bounces_first = 0
 
         MujocoEnv.__init__(self, self.xml_path, frame_skip)
         utils.EzPickle.__init__(self)
@@ -75,6 +76,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
         return self.sim.data.qvel[0:7].copy()
 
     def reset(self):
+        print(not self.reward_function.ball_ground_contact_first)
+        self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
         self.reward_function.reset(self.add_noise)
         return super().reset()
 
diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
index c9ed451..24c48be 100644
--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@@ -50,7 +50,7 @@ class BeerPongReward:
         self.angle_rewards = []
         self.cup_angles = []
         self.cup_z_axes = []
-        self.ball_ground_contact = False
+        self.ball_ground_contact_first = False
         self.ball_table_contact = False
         self.ball_wall_contact = False
         self.ball_cup_contact = False
@@ -150,8 +150,9 @@ class BeerPongReward:
         if env._steps == env.ep_length - 1 or self._is_collided:
             min_dist = np.min(self.dists)
             final_dist = self.dists_final[-1]
-
-            # encourage bounce before falling into cup
+            # if self.ball_ground_contact_first:
+            #     min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+            # else:
             if not self.ball_in_cup:
                 if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
                     min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
@@ -159,17 +160,47 @@ class BeerPongReward:
                     min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
             else:
                 min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
-
-            reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
-                     1e-4*np.mean(action_cost)
-                     # 1e-7*np.mean(action_cost)
+            reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+                     1e-4 * np.mean(action_cost)
+            # 1e-7*np.mean(action_cost)
             success = self.ball_in_cup
         else:
-            # reward = - 1e-2 * action_cost
-            reward = - 1e-4 * action_cost
+            reward = - 1e-2 * action_cost
+            # reward = - 1e-4 * action_cost
+            # reward = 0
             success = False
         # ################################################################################################################
 
+        # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+        # self._check_contacts(env.sim)
+        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        # if env._steps == env.ep_length - 1 or self._is_collided:
+        #     min_dist = np.min(self.dists)
+        #     final_dist = self.dists_final[-1]
+        #
+        #     if not self.ball_in_cup:
+        #         if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+        #         else:
+        #             if self.ball_ground_contact_first:
+        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
+        #             else:
+        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
+        #     else:
+        #         if self.ball_ground_contact_first:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
+        #         else:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
+        #     reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+        #              1e-7 * np.mean(action_cost)
+        #     # 1e-4*np.mean(action_cost)
+        #     success = self.ball_in_cup
+        # else:
+        #     # reward = - 1e-2 * action_cost
+        #     # reward = - 1e-4 * action_cost
+        #     reward = 0
+        #     success = False
+        # ################################################################################################################
         infos = {}
         infos["success"] = success
         infos["is_collided"] = self._is_collided
@@ -193,6 +224,10 @@ class BeerPongReward:
         if not self.ball_in_cup:
             self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
                                                                     self.cup_table_collision_id)
+        if not self.ball_ground_contact_first:
+            if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
+                self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
+                                                                    self.ground_collision_id)
 
     def _check_collision_single_objects(self, sim, id_1, id_2):
         for coni in range(0, sim.data.ncon):