diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 24353b2..a308852 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -178,7 +178,7 @@ register( "hole_width": None, "hole_depth": 1, "hole_x": None, - "collision_penalty": 100, + "collision_penalty": 1000, } ) @@ -210,7 +210,7 @@ register( "hole_width": 0.25, "hole_depth": 1, "hole_x": 2, - "collision_penalty": 2, + "collision_penalty": 1000, } ) diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py index bfd7940..fdd299e 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -43,7 +43,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward reward_function = BallInACupReward else: - raise ValueError("Unknown reward type") + raise ValueError("Unknown reward type: {}".format(reward_type)) self.reward_function = reward_function(self.sim_steps) @property @@ -106,7 +106,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): done = success or self._steps == self.sim_steps - 1 or is_collided self._steps += 1 else: - reward = -2 + reward = -2000 success = False is_collided = False done = True @@ -154,6 +154,22 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): des_vel_full[5] = des_vel[2] return des_vel_full + def render(self, render_mode, **render_kwargs): + if render_mode == "plot_trajectory": + if self._steps == 1: + import matplotlib.pyplot as plt + # plt.ion() + self.fig, self.axs = plt.subplots(3, 1) + + if self._steps <= 1750: + for ax, cp in zip(self.axs, self.current_pos[1::2]): + ax.scatter(self._steps, cp, s=2, marker=".") + + # self.fig.show() + + else: + super().render(render_mode, **render_kwargs) + if __name__ == "__main__": env = ALRBallInACupEnv() diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py index 0bc0fd2..8f0c588 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py @@ -22,7 +22,7 @@ class BallInACupReward(alr_reward_fct.AlrReward): self.goal_final_id = None self.collision_ids = None self._is_collided = False - self.collision_penalty = 1 + self.collision_penalty = 1000 self.ball_traj = None self.dists = None @@ -74,11 +74,11 @@ class BallInACupReward(alr_reward_fct.AlrReward): # cost_angle = (angle_min_dist - np.pi / 2)**2 - min_dist = self.dists[t_min_dist] + # min_dist = self.dists[t_min_dist] dist_final = self.dists_final[-1] min_dist_final = np.min(self.dists_final) - cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + + # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided) diff --git a/example.py b/example.py index a8ae649..20fd165 100644 --- a/example.py +++ b/example.py @@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_ if __name__ == '__main__': # example_mujoco() - example_mp("alr_envs:SimpleReacherDMP-v1") + # example_mp("alr_envs:SimpleReacherDMP-v1") # example_async("alr_envs:LongSimpleReacherDMP-v0", 4) # example_async_contextual_sampler() # env = gym.make("alr_envs:HoleReacherDetPMP-v1") - # env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" - # example_async_sampler(env_name) + env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" + example_async_sampler(env_name) # example_mp(env_name)