shorter number of release steps for beerpong

2022-04-27 16:15:17 +02:00 · 2022-04-27 16:15:17 +02:00 · 7f64c975cd
commit 7f64c975cd
parent a9460f15fd
3 changed files with 55 additions and 15 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -559,11 +559,13 @@ for _v in _versions:
            "mp_kwargs": {
                "num_dof": 7,
                "num_basis": 2,
-                "duration": 1,
-                "post_traj_time": 2,
+                # "duration": 1,
+                "duration": 0.5,
+                # "post_traj_time": 2,
+                "post_traj_time": 2.5,
                "policy_type": "motor",
-                # "weights_scale": 0.15,
-                "weights_scale": 1,
+                "weights_scale": 0.14,
+                # "weights_scale": 1,
                "zero_start": True,
                "zero_goal": False,
                "policy_kwargs": {
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -41,9 +41,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        self.ball_id = 11

        # self._release_step = 175  # time step of ball release
-        self._release_step = 130  # time step of ball release
+        # self._release_step = 130  # time step of ball release
+        self._release_step = 100  # time step of ball release

-        self.sim_time = 3  # seconds
        self.ep_length = 600  # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
        self.cup_table_id = 10

@ -54,6 +54,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):

        reward_function = BeerPongReward
        self.reward_function = reward_function()
+        self.n_table_bounces_first = 0

        MujocoEnv.__init__(self, self.xml_path, frame_skip)
        utils.EzPickle.__init__(self)
@ -75,6 +76,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        return self.sim.data.qvel[0:7].copy()

    def reset(self):
+        print(not self.reward_function.ball_ground_contact_first)
+        self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
        self.reward_function.reset(self.add_noise)
        return super().reset()

--- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py
@ -50,7 +50,7 @@ class BeerPongReward:
        self.angle_rewards = []
        self.cup_angles = []
        self.cup_z_axes = []
-        self.ball_ground_contact = False
+        self.ball_ground_contact_first = False
        self.ball_table_contact = False
        self.ball_wall_contact = False
        self.ball_cup_contact = False
@ -150,8 +150,9 @@ class BeerPongReward:
        if env._steps == env.ep_length - 1 or self._is_collided:
            min_dist = np.min(self.dists)
            final_dist = self.dists_final[-1]
-
-            # encourage bounce before falling into cup
+            # if self.ball_ground_contact_first:
+            #     min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+            # else:
            if not self.ball_in_cup:
                if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
                    min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
@ -159,17 +160,47 @@ class BeerPongReward:
                    min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
            else:
                min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
-
-            reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
-                     1e-4*np.mean(action_cost)
+            reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+                     1e-4 * np.mean(action_cost)
            # 1e-7*np.mean(action_cost)
            success = self.ball_in_cup
        else:
-            # reward = - 1e-2 * action_cost
-            reward = - 1e-4 * action_cost
+            reward = - 1e-2 * action_cost
+            # reward = - 1e-4 * action_cost
+            # reward = 0
            success = False
        # ################################################################################################################

+        # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
+        # self._check_contacts(env.sim)
+        # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
+        # if env._steps == env.ep_length - 1 or self._is_collided:
+        #     min_dist = np.min(self.dists)
+        #     final_dist = self.dists_final[-1]
+        #
+        #     if not self.ball_in_cup:
+        #         if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
+        #         else:
+        #             if self.ball_ground_contact_first:
+        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
+        #             else:
+        #                 min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
+        #     else:
+        #         if self.ball_ground_contact_first:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
+        #         else:
+        #             min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
+        #     reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
+        #              1e-7 * np.mean(action_cost)
+        #     # 1e-4*np.mean(action_cost)
+        #     success = self.ball_in_cup
+        # else:
+        #     # reward = - 1e-2 * action_cost
+        #     # reward = - 1e-4 * action_cost
+        #     reward = 0
+        #     success = False
+        # ################################################################################################################
        infos = {}
        infos["success"] = success
        infos["is_collided"] = self._is_collided
@ -193,6 +224,10 @@ class BeerPongReward:
        if not self.ball_in_cup:
            self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
                                                                    self.cup_table_collision_id)
+        if not self.ball_ground_contact_first:
+            if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
+                self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
+                                                                    self.ground_collision_id)

    def _check_collision_single_objects(self, sim, id_1, id_2):
        for coni in range(0, sim.data.ncon):