working bp version, tested with CMORE on a smaller context with 1 seed
This commit is contained in:
parent
eb7dd3a18f
commit
7ffe94dcfd
@ -391,6 +391,7 @@ for _v in _versions:
|
||||
"duration": 1,
|
||||
"post_traj_time": 2,
|
||||
"policy_type": "motor",
|
||||
# "weights_scale": 0.15,
|
||||
"weights_scale": 1,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
|
@ -10,6 +10,10 @@ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||
CUP_POS_MIN = np.array([-0.32, -2.2])
|
||||
CUP_POS_MAX = np.array([0.32, -1.2])
|
||||
|
||||
# smaller context space -> Easier task
|
||||
# CUP_POS_MIN = np.array([-0.16, -2.2])
|
||||
# CUP_POS_MAX = np.array([0.16, -1.7])
|
||||
|
||||
|
||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||
@ -36,7 +40,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
self.ball_site_id = 0
|
||||
self.ball_id = 11
|
||||
|
||||
self._release_step = 175 # time step of ball release
|
||||
# self._release_step = 175 # time step of ball release
|
||||
self._release_step = 130 # time step of ball release
|
||||
|
||||
self.sim_time = 3 # seconds
|
||||
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
||||
|
@ -85,7 +85,7 @@ class BeerPongReward:
|
||||
# if not self.ball_table_contact:
|
||||
# self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||
# self.table_collision_id)
|
||||
|
||||
#
|
||||
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||
# if env._steps == env.ep_length - 1 or self._is_collided:
|
||||
# min_dist = np.min(self.dists)
|
||||
@ -115,7 +115,7 @@ class BeerPongReward:
|
||||
# crash = False
|
||||
# ################################################################################################################
|
||||
|
||||
# ##################### Reward function which does not force to bounce once on the table (tanh) ################
|
||||
##################### Reward function which does not force to bounce once on the table (tanh) ################
|
||||
# self._check_contacts(env.sim)
|
||||
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||
# if env._steps == env.ep_length - 1 or self._is_collided:
|
||||
@ -142,9 +142,9 @@ class BeerPongReward:
|
||||
# reward = - 1e-2 * action_cost
|
||||
# success = False
|
||||
# crash = False
|
||||
# ################################################################################################################
|
||||
################################################################################################################
|
||||
|
||||
# ##################### Reward function which does not force to bounce once on the table (quad dist) ############
|
||||
# # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
|
||||
self._check_contacts(env.sim)
|
||||
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||
if env._steps == env.ep_length - 1 or self._is_collided:
|
||||
@ -162,12 +162,12 @@ class BeerPongReward:
|
||||
|
||||
reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
|
||||
1e-4*np.mean(action_cost)
|
||||
# 1e-7*np.mean(action_cost)
|
||||
success = self.ball_in_cup
|
||||
crash = self._is_collided
|
||||
else:
|
||||
reward = - 1e-2 * action_cost
|
||||
# reward = - 1e-2 * action_cost
|
||||
reward = - 1e-4 * action_cost
|
||||
success = False
|
||||
crash = False
|
||||
# ################################################################################################################
|
||||
|
||||
infos = {}
|
||||
|
Loading…
Reference in New Issue
Block a user