From a0a9c9c7fb27559b5d185d9db561930f479ff952 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 1 Jun 2021 16:52:54 +0200 Subject: [PATCH] wip --- alr_envs/__init__.py | 2 +- alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py | 4 ++++ .../ball_in_a_cup/ball_in_a_cup_reward_simple.py | 15 ++++++++++----- example.py | 6 +++--- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 986265c..24353b2 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -210,7 +210,7 @@ register( "hole_width": 0.25, "hole_depth": 1, "hole_x": 2, - "collision_penalty": 100, + "collision_penalty": 2, } ) diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py index 345b3ce..bfd7940 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -68,6 +68,10 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): def current_vel(self): return self.sim.data.qvel[0:7].copy() + def reset(self): + self.reward_function.reset(None) + return super().reset() + def reset_model(self): init_pos_all = self.init_qpos.copy() init_pos_robot = self._start_pos diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py index 79987d6..0bc0fd2 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py @@ -37,6 +37,7 @@ class BallInACupReward(alr_reward_fct.AlrReward): self.dists_final = [] self.costs = [] self.action_costs = [] + self.angle_costs = [] self.cup_angles = [] def compute_reward(self, action, env): @@ -56,8 +57,11 @@ class BallInACupReward(alr_reward_fct.AlrReward): self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) self.ball_traj[env._steps, :] = ball_pos cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]]) - self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), - 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))) + cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), + 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)) + cost_angle = (cup_angle - np.pi / 2) ** 2 + self.angle_costs.append(cost_angle) + self.cup_angles.append(cup_angle) action_cost = np.sum(np.square(action)) self.action_costs.append(action_cost) @@ -67,7 +71,8 @@ class BallInACupReward(alr_reward_fct.AlrReward): if env._steps == env.sim_steps - 1 or self._is_collided: t_min_dist = np.argmin(self.dists) angle_min_dist = self.cup_angles[t_min_dist] - cost_angle = (angle_min_dist - np.pi / 2)**2 + # cost_angle = (angle_min_dist - np.pi / 2)**2 + min_dist = self.dists[t_min_dist] dist_final = self.dists_final[-1] @@ -76,11 +81,11 @@ class BallInACupReward(alr_reward_fct.AlrReward): cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) - reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) + reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided) success = dist_final < 0.05 and ball_in_cup and not self._is_collided crash = self._is_collided else: - reward = - 1e-5 * action_cost # TODO: increase action_cost weight + reward = - 5e-4 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight success = False crash = False diff --git a/example.py b/example.py index af30138..a8ae649 100644 --- a/example.py +++ b/example.py @@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_ if __name__ == '__main__': # example_mujoco() - # example_dmp("alr_envs:SimpleReacherDMP-v1") + example_mp("alr_envs:SimpleReacherDMP-v1") # example_async("alr_envs:LongSimpleReacherDMP-v0", 4) # example_async_contextual_sampler() # env = gym.make("alr_envs:HoleReacherDetPMP-v1") - env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" + # env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" # example_async_sampler(env_name) - example_mp(env_name) + # example_mp(env_name)