wip
This commit is contained in:
parent
746d408a76
commit
a0a9c9c7fb
@ -210,7 +210,7 @@ register(
|
|||||||
"hole_width": 0.25,
|
"hole_width": 0.25,
|
||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": 2,
|
"hole_x": 2,
|
||||||
"collision_penalty": 100,
|
"collision_penalty": 2,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -68,6 +68,10 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
def current_vel(self):
|
def current_vel(self):
|
||||||
return self.sim.data.qvel[0:7].copy()
|
return self.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.reward_function.reset(None)
|
||||||
|
return super().reset()
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
init_pos_all = self.init_qpos.copy()
|
init_pos_all = self.init_qpos.copy()
|
||||||
init_pos_robot = self._start_pos
|
init_pos_robot = self._start_pos
|
||||||
|
@ -37,6 +37,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.dists_final = []
|
self.dists_final = []
|
||||||
self.costs = []
|
self.costs = []
|
||||||
self.action_costs = []
|
self.action_costs = []
|
||||||
|
self.angle_costs = []
|
||||||
self.cup_angles = []
|
self.cup_angles = []
|
||||||
|
|
||||||
def compute_reward(self, action, env):
|
def compute_reward(self, action, env):
|
||||||
@ -56,8 +57,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||||
self.ball_traj[env._steps, :] = ball_pos
|
self.ball_traj[env._steps, :] = ball_pos
|
||||||
cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
|
cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
|
||||||
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
|
cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
|
||||||
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
|
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))
|
||||||
|
cost_angle = (cup_angle - np.pi / 2) ** 2
|
||||||
|
self.angle_costs.append(cost_angle)
|
||||||
|
self.cup_angles.append(cup_angle)
|
||||||
|
|
||||||
action_cost = np.sum(np.square(action))
|
action_cost = np.sum(np.square(action))
|
||||||
self.action_costs.append(action_cost)
|
self.action_costs.append(action_cost)
|
||||||
@ -67,7 +71,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
if env._steps == env.sim_steps - 1 or self._is_collided:
|
if env._steps == env.sim_steps - 1 or self._is_collided:
|
||||||
t_min_dist = np.argmin(self.dists)
|
t_min_dist = np.argmin(self.dists)
|
||||||
angle_min_dist = self.cup_angles[t_min_dist]
|
angle_min_dist = self.cup_angles[t_min_dist]
|
||||||
cost_angle = (angle_min_dist - np.pi / 2)**2
|
# cost_angle = (angle_min_dist - np.pi / 2)**2
|
||||||
|
|
||||||
|
|
||||||
min_dist = self.dists[t_min_dist]
|
min_dist = self.dists[t_min_dist]
|
||||||
dist_final = self.dists_final[-1]
|
dist_final = self.dists_final[-1]
|
||||||
@ -76,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||||
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
||||||
crash = self._is_collided
|
crash = self._is_collided
|
||||||
else:
|
else:
|
||||||
reward = - 1e-5 * action_cost # TODO: increase action_cost weight
|
reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
|
||||||
success = False
|
success = False
|
||||||
crash = False
|
crash = False
|
||||||
|
|
||||||
|
@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# example_mujoco()
|
# example_mujoco()
|
||||||
# example_dmp("alr_envs:SimpleReacherDMP-v1")
|
example_mp("alr_envs:SimpleReacherDMP-v1")
|
||||||
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
||||||
# example_async_contextual_sampler()
|
# example_async_contextual_sampler()
|
||||||
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
||||||
env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
# env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
||||||
# example_async_sampler(env_name)
|
# example_async_sampler(env_name)
|
||||||
example_mp(env_name)
|
# example_mp(env_name)
|
||||||
|
Loading…
Reference in New Issue
Block a user