shorter number of release steps for beerpong

This commit is contained in:
Onur 2022-04-27 16:15:17 +02:00
parent a9460f15fd
commit 7f64c975cd
3 changed files with 55 additions and 15 deletions

View File

@ -559,11 +559,13 @@ for _v in _versions:
"mp_kwargs": {
"num_dof": 7,
"num_basis": 2,
"duration": 1,
"post_traj_time": 2,
# "duration": 1,
"duration": 0.5,
# "post_traj_time": 2,
"post_traj_time": 2.5,
"policy_type": "motor",
# "weights_scale": 0.15,
"weights_scale": 1,
"weights_scale": 0.14,
# "weights_scale": 1,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {

View File

@ -41,9 +41,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.ball_id = 11
# self._release_step = 175 # time step of ball release
self._release_step = 130 # time step of ball release
# self._release_step = 130 # time step of ball release
self._release_step = 100 # time step of ball release
self.sim_time = 3 # seconds
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10
@ -54,6 +54,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
reward_function = BeerPongReward
self.reward_function = reward_function()
self.n_table_bounces_first = 0
MujocoEnv.__init__(self, self.xml_path, frame_skip)
utils.EzPickle.__init__(self)
@ -75,6 +76,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self.sim.data.qvel[0:7].copy()
def reset(self):
print(not self.reward_function.ball_ground_contact_first)
self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
self.reward_function.reset(self.add_noise)
return super().reset()

View File

@ -50,7 +50,7 @@ class BeerPongReward:
self.angle_rewards = []
self.cup_angles = []
self.cup_z_axes = []
self.ball_ground_contact = False
self.ball_ground_contact_first = False
self.ball_table_contact = False
self.ball_wall_contact = False
self.ball_cup_contact = False
@ -150,8 +150,9 @@ class BeerPongReward:
if env._steps == env.ep_length - 1 or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
# encourage bounce before falling into cup
# if self.ball_ground_contact_first:
# min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
# else:
if not self.ball_in_cup:
if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
@ -159,17 +160,47 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
else:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
1e-4 * np.mean(action_cost)
# 1e-7*np.mean(action_cost)
success = self.ball_in_cup
else:
# reward = - 1e-2 * action_cost
reward = - 1e-4 * action_cost
reward = - 1e-2 * action_cost
# reward = - 1e-4 * action_cost
# reward = 0
success = False
# ################################################################################################################
# # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
# self._check_contacts(env.sim)
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
# if env._steps == env.ep_length - 1 or self._is_collided:
# min_dist = np.min(self.dists)
# final_dist = self.dists_final[-1]
#
# if not self.ball_in_cup:
# if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact:
# min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -6
# else:
# if self.ball_ground_contact_first:
# min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -4
# else:
# min_dist_coeff, final_dist_coeff, rew_offset = 1, 0.5, -2
# else:
# if self.ball_ground_contact_first:
# min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, -1
# else:
# min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
# reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
# 1e-7 * np.mean(action_cost)
# # 1e-4*np.mean(action_cost)
# success = self.ball_in_cup
# else:
# # reward = - 1e-2 * action_cost
# # reward = - 1e-4 * action_cost
# reward = 0
# success = False
# ################################################################################################################
infos = {}
infos["success"] = success
infos["is_collided"] = self._is_collided
@ -193,6 +224,10 @@ class BeerPongReward:
if not self.ball_in_cup:
self.ball_in_cup = self._check_collision_single_objects(sim, self.ball_collision_id,
self.cup_table_collision_id)
if not self.ball_ground_contact_first:
if not self.ball_table_contact and not self.ball_cup_contact and not self.ball_wall_contact and not self.ball_in_cup:
self.ball_ground_contact_first = self._check_collision_single_objects(sim, self.ball_collision_id,
self.ground_collision_id)
def _check_collision_single_objects(self, sim, id_1, id_2):
for coni in range(0, sim.data.ncon):