This commit is contained in:
Maximilian Huettenrauch 2021-06-16 10:29:38 +02:00
parent a0a9c9c7fb
commit 3876478b96
4 changed files with 26 additions and 10 deletions

View File

@ -178,7 +178,7 @@ register(
"hole_width": None, "hole_width": None,
"hole_depth": 1, "hole_depth": 1,
"hole_x": None, "hole_x": None,
"collision_penalty": 100, "collision_penalty": 1000,
} }
) )
@ -210,7 +210,7 @@ register(
"hole_width": 0.25, "hole_width": 0.25,
"hole_depth": 1, "hole_depth": 1,
"hole_x": 2, "hole_x": 2,
"collision_penalty": 2, "collision_penalty": 1000,
} }
) )

View File

@ -43,7 +43,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
reward_function = BallInACupReward reward_function = BallInACupReward
else: else:
raise ValueError("Unknown reward type") raise ValueError("Unknown reward type: {}".format(reward_type))
self.reward_function = reward_function(self.sim_steps) self.reward_function = reward_function(self.sim_steps)
@property @property
@ -106,7 +106,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
done = success or self._steps == self.sim_steps - 1 or is_collided done = success or self._steps == self.sim_steps - 1 or is_collided
self._steps += 1 self._steps += 1
else: else:
reward = -2 reward = -2000
success = False success = False
is_collided = False is_collided = False
done = True done = True
@ -154,6 +154,22 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
des_vel_full[5] = des_vel[2] des_vel_full[5] = des_vel[2]
return des_vel_full return des_vel_full
def render(self, render_mode, **render_kwargs):
if render_mode == "plot_trajectory":
if self._steps == 1:
import matplotlib.pyplot as plt
# plt.ion()
self.fig, self.axs = plt.subplots(3, 1)
if self._steps <= 1750:
for ax, cp in zip(self.axs, self.current_pos[1::2]):
ax.scatter(self._steps, cp, s=2, marker=".")
# self.fig.show()
else:
super().render(render_mode, **render_kwargs)
if __name__ == "__main__": if __name__ == "__main__":
env = ALRBallInACupEnv() env = ALRBallInACupEnv()

View File

@ -22,7 +22,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.goal_final_id = None self.goal_final_id = None
self.collision_ids = None self.collision_ids = None
self._is_collided = False self._is_collided = False
self.collision_penalty = 1 self.collision_penalty = 1000
self.ball_traj = None self.ball_traj = None
self.dists = None self.dists = None
@ -74,11 +74,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
# cost_angle = (angle_min_dist - np.pi / 2)**2 # cost_angle = (angle_min_dist - np.pi / 2)**2
min_dist = self.dists[t_min_dist] # min_dist = self.dists[t_min_dist]
dist_final = self.dists_final[-1] dist_final = self.dists_final[-1]
min_dist_final = np.min(self.dists_final) min_dist_final = np.min(self.dists_final)
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided) reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)

View File

@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_
if __name__ == '__main__': if __name__ == '__main__':
# example_mujoco() # example_mujoco()
example_mp("alr_envs:SimpleReacherDMP-v1") # example_mp("alr_envs:SimpleReacherDMP-v1")
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4) # example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
# example_async_contextual_sampler() # example_async_contextual_sampler()
# env = gym.make("alr_envs:HoleReacherDetPMP-v1") # env = gym.make("alr_envs:HoleReacherDetPMP-v1")
# env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
# example_async_sampler(env_name) example_async_sampler(env_name)
# example_mp(env_name) # example_mp(env_name)