working bp version, tested with CMORE on a smaller context with 1 seed
This commit is contained in:
parent
eb7dd3a18f
commit
7ffe94dcfd
@ -391,6 +391,7 @@ for _v in _versions:
|
|||||||
"duration": 1,
|
"duration": 1,
|
||||||
"post_traj_time": 2,
|
"post_traj_time": 2,
|
||||||
"policy_type": "motor",
|
"policy_type": "motor",
|
||||||
|
# "weights_scale": 0.15,
|
||||||
"weights_scale": 1,
|
"weights_scale": 1,
|
||||||
"zero_start": True,
|
"zero_start": True,
|
||||||
"zero_goal": False,
|
"zero_goal": False,
|
||||||
|
@ -10,6 +10,10 @@ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
|||||||
CUP_POS_MIN = np.array([-0.32, -2.2])
|
CUP_POS_MIN = np.array([-0.32, -2.2])
|
||||||
CUP_POS_MAX = np.array([0.32, -1.2])
|
CUP_POS_MAX = np.array([0.32, -1.2])
|
||||||
|
|
||||||
|
# smaller context space -> Easier task
|
||||||
|
# CUP_POS_MIN = np.array([-0.16, -2.2])
|
||||||
|
# CUP_POS_MAX = np.array([0.16, -1.7])
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||||
@ -36,7 +40,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.ball_site_id = 0
|
self.ball_site_id = 0
|
||||||
self.ball_id = 11
|
self.ball_id = 11
|
||||||
|
|
||||||
self._release_step = 175 # time step of ball release
|
# self._release_step = 175 # time step of ball release
|
||||||
|
self._release_step = 130 # time step of ball release
|
||||||
|
|
||||||
self.sim_time = 3 # seconds
|
self.sim_time = 3 # seconds
|
||||||
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
|
||||||
|
@ -85,7 +85,7 @@ class BeerPongReward:
|
|||||||
# if not self.ball_table_contact:
|
# if not self.ball_table_contact:
|
||||||
# self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
# self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
|
||||||
# self.table_collision_id)
|
# self.table_collision_id)
|
||||||
|
#
|
||||||
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||||
# if env._steps == env.ep_length - 1 or self._is_collided:
|
# if env._steps == env.ep_length - 1 or self._is_collided:
|
||||||
# min_dist = np.min(self.dists)
|
# min_dist = np.min(self.dists)
|
||||||
@ -115,7 +115,7 @@ class BeerPongReward:
|
|||||||
# crash = False
|
# crash = False
|
||||||
# ################################################################################################################
|
# ################################################################################################################
|
||||||
|
|
||||||
# ##################### Reward function which does not force to bounce once on the table (tanh) ################
|
##################### Reward function which does not force to bounce once on the table (tanh) ################
|
||||||
# self._check_contacts(env.sim)
|
# self._check_contacts(env.sim)
|
||||||
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||||
# if env._steps == env.ep_length - 1 or self._is_collided:
|
# if env._steps == env.ep_length - 1 or self._is_collided:
|
||||||
@ -142,9 +142,9 @@ class BeerPongReward:
|
|||||||
# reward = - 1e-2 * action_cost
|
# reward = - 1e-2 * action_cost
|
||||||
# success = False
|
# success = False
|
||||||
# crash = False
|
# crash = False
|
||||||
# ################################################################################################################
|
################################################################################################################
|
||||||
|
|
||||||
# ##################### Reward function which does not force to bounce once on the table (quad dist) ############
|
# # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
|
||||||
self._check_contacts(env.sim)
|
self._check_contacts(env.sim)
|
||||||
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
|
||||||
if env._steps == env.ep_length - 1 or self._is_collided:
|
if env._steps == env.ep_length - 1 or self._is_collided:
|
||||||
@ -162,12 +162,12 @@ class BeerPongReward:
|
|||||||
|
|
||||||
reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
|
reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
|
||||||
1e-4*np.mean(action_cost)
|
1e-4*np.mean(action_cost)
|
||||||
|
# 1e-7*np.mean(action_cost)
|
||||||
success = self.ball_in_cup
|
success = self.ball_in_cup
|
||||||
crash = self._is_collided
|
|
||||||
else:
|
else:
|
||||||
reward = - 1e-2 * action_cost
|
# reward = - 1e-2 * action_cost
|
||||||
|
reward = - 1e-4 * action_cost
|
||||||
success = False
|
success = False
|
||||||
crash = False
|
|
||||||
# ################################################################################################################
|
# ################################################################################################################
|
||||||
|
|
||||||
infos = {}
|
infos = {}
|
||||||
|
Loading…
Reference in New Issue
Block a user