diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 8f37f78..ac885fa 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -559,11 +559,13 @@ for _v in _versions: "mp_kwargs": { "num_dof": 7, "num_basis": 2, - "duration": 1, - "post_traj_time": 2, + # "duration": 1, + "duration": 0.5, + # "post_traj_time": 2, + "post_traj_time": 2.5, "policy_type": "motor", - # "weights_scale": 0.15, - "weights_scale": 1, + "weights_scale": 0.14, + # "weights_scale": 1, "zero_start": True, "zero_goal": False, "policy_kwargs": { diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index d885e78..f5d2bd8 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -41,9 +41,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self.ball_id = 11 # self._release_step = 175 # time step of ball release - self._release_step = 130 # time step of ball release + # self._release_step = 130 # time step of ball release + self._release_step = 100 # time step of ball release - self.sim_time = 3 # seconds self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt) self.cup_table_id = 10 @@ -54,6 +54,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): reward_function = BeerPongReward self.reward_function = reward_function() + self.n_table_bounces_first = 0 MujocoEnv.__init__(self, self.xml_path, frame_skip) utils.EzPickle.__init__(self) @@ -75,6 +76,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): return self.sim.data.qvel[0:7].copy() def reset(self): + print(not self.reward_function.ball_ground_contact_first) + self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first) self.reward_function.reset(self.add_noise) return super().reset() diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index c9ed451..24c48be 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -50,7 +50,7 @@ class BeerPongReward: self.angle_rewards = [] self.cup_angles = [] self.cup_z_axes = [] - self.ball_ground_contact = False + self.ball_ground_contact_first = False self.ball_table_contact = False self.ball_wall_contact = False self.ball_cup_contact = False @@ -150,8 +150,9 @@ class BeerPongReward: if env._steps == env.ep_length - 1 or self._is_collided: min_dist = np.min(self.dists) final_dist = self.dists_final[-1] - - # encourage bounce before falling into cup + # if self.ball_ground_contact_first: + # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6 + # else: if not self.ball_in_cup: if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact: min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4 @@ -159,17 +160,47 @@ class BeerPongReward: min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2 else: min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0 - - reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \ - 1e-4*np.mean(action_cost) - # 1e-7*np.mean(action_cost) + reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \ + 1e-4 * np.mean(action_cost) + # 1e-7*np.mean(action_cost) success = self.ball_in_cup else: - # reward = - 1e-2 * action_cost - reward = - 1e-4 * action_cost + reward = - 1e-2 * action_cost + # reward = - 1e-4 * action_cost + # reward = 0 success = False # ################################################################################################################ + # # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############ + # self._check_contacts(env.sim) + # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) + # if env._steps == env.ep_length - 1 or self._is_collided: + # min_dist = np.min(self.dists) + # final_dist = self.dists_final[-1] + # + # if not self.ball_in_cup: + # if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact: + # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6 + # else: + # if self.ball_ground_contact_first: + # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4 + # else: + # min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2 + # else: + # if self.ball_ground_contact_first: + # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1 + # else: + # min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0 + # reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \ + # 1e-7 * np.mean(action_cost) + # # 1e-4*np.mean(action_cost) + # success = self.ball_in_cup + # else: + # # reward = - 1e-2 * action_cost + # # reward = - 1e-4 * action_cost + # reward = 0 + # success = False + # ################################################################################################################ infos = {} infos["success"] = success infos["is_collided"] = self._is_collided @@ -193,6 +224,10 @@ class BeerPongReward: if not self.ball_in_cup: self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id, self.cup_table_collision_id) + if not self.ball_ground_contact_first: + if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup: + self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id, + self.ground_collision_id) def _check_collision_single_objects(self, sim, id_1, id_2): for coni in range(0, sim.data.ncon):