working bp version, tested with CMORE on a smaller context with 1 seed

This commit is contained in:
Onur 2022-04-08 17:32:53 +02:00
parent eb7dd3a18f
commit 7ffe94dcfd
3 changed files with 14 additions and 8 deletions

View File

@ -391,6 +391,7 @@ for _v in _versions:
"duration": 1, "duration": 1,
"post_traj_time": 2, "post_traj_time": 2,
"policy_type": "motor", "policy_type": "motor",
# "weights_scale": 0.15,
"weights_scale": 1, "weights_scale": 1,
"zero_start": True, "zero_start": True,
"zero_goal": False, "zero_goal": False,

View File

@ -10,6 +10,10 @@ from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
CUP_POS_MIN = np.array([-0.32, -2.2]) CUP_POS_MIN = np.array([-0.32, -2.2])
CUP_POS_MAX = np.array([0.32, -1.2]) CUP_POS_MAX = np.array([0.32, -1.2])
# smaller context space -> Easier task
# CUP_POS_MIN = np.array([-0.16, -2.2])
# CUP_POS_MAX = np.array([0.16, -1.7])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
@ -36,7 +40,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.ball_site_id = 0 self.ball_site_id = 0
self.ball_id = 11 self.ball_id = 11
self._release_step = 175 # time step of ball release # self._release_step = 175 # time step of ball release
self._release_step = 130 # time step of ball release
self.sim_time = 3 # seconds self.sim_time = 3 # seconds
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt) self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)

View File

@ -85,7 +85,7 @@ class BeerPongReward:
# if not self.ball_table_contact: # if not self.ball_table_contact:
# self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id, # self.ball_table_contact = self._check_collision_single_objects(env.sim, self.ball_collision_id,
# self.table_collision_id) # self.table_collision_id)
#
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
# if env._steps == env.ep_length - 1 or self._is_collided: # if env._steps == env.ep_length - 1 or self._is_collided:
# min_dist = np.min(self.dists) # min_dist = np.min(self.dists)
@ -115,7 +115,7 @@ class BeerPongReward:
# crash = False # crash = False
# ################################################################################################################ # ################################################################################################################
# ##################### Reward function which does not force to bounce once on the table (tanh) ################ ##################### Reward function which does not force to bounce once on the table (tanh) ################
# self._check_contacts(env.sim) # self._check_contacts(env.sim)
# self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) # self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
# if env._steps == env.ep_length - 1 or self._is_collided: # if env._steps == env.ep_length - 1 or self._is_collided:
@ -142,9 +142,9 @@ class BeerPongReward:
# reward = - 1e-2 * action_cost # reward = - 1e-2 * action_cost
# success = False # success = False
# crash = False # crash = False
# ################################################################################################################ ################################################################################################################
# ##################### Reward function which does not force to bounce once on the table (quad dist) ############ # # ##################### Reward function which does not force to bounce once on the table (quad dist) ############
self._check_contacts(env.sim) self._check_contacts(env.sim)
self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids) self._is_collided = self._check_collision_with_itself(env.sim, self.robot_collision_ids)
if env._steps == env.ep_length - 1 or self._is_collided: if env._steps == env.ep_length - 1 or self._is_collided:
@ -162,12 +162,12 @@ class BeerPongReward:
reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \ reward = rew_offset - min_dist_coeff * min_dist**2 - final_dist_coeff * final_dist**2 - \
1e-4*np.mean(action_cost) 1e-4*np.mean(action_cost)
# 1e-7*np.mean(action_cost)
success = self.ball_in_cup success = self.ball_in_cup
crash = self._is_collided
else: else:
reward = - 1e-2 * action_cost # reward = - 1e-2 * action_cost
reward = - 1e-4 * action_cost
success = False success = False
crash = False
# ################################################################################################################ # ################################################################################################################
infos = {} infos = {}