updates
This commit is contained in:
parent
a0a9c9c7fb
commit
3876478b96
@ -178,7 +178,7 @@ register(
|
|||||||
"hole_width": None,
|
"hole_width": None,
|
||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": None,
|
"hole_x": None,
|
||||||
"collision_penalty": 100,
|
"collision_penalty": 1000,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -210,7 +210,7 @@ register(
|
|||||||
"hole_width": 0.25,
|
"hole_width": 0.25,
|
||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": 2,
|
"hole_x": 2,
|
||||||
"collision_penalty": 2,
|
"collision_penalty": 1000,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
||||||
reward_function = BallInACupReward
|
reward_function = BallInACupReward
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unknown reward type")
|
raise ValueError("Unknown reward type: {}".format(reward_type))
|
||||||
self.reward_function = reward_function(self.sim_steps)
|
self.reward_function = reward_function(self.sim_steps)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -106,7 +106,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
done = success or self._steps == self.sim_steps - 1 or is_collided
|
done = success or self._steps == self.sim_steps - 1 or is_collided
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
else:
|
else:
|
||||||
reward = -2
|
reward = -2000
|
||||||
success = False
|
success = False
|
||||||
is_collided = False
|
is_collided = False
|
||||||
done = True
|
done = True
|
||||||
@ -154,6 +154,22 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
des_vel_full[5] = des_vel[2]
|
des_vel_full[5] = des_vel[2]
|
||||||
return des_vel_full
|
return des_vel_full
|
||||||
|
|
||||||
|
def render(self, render_mode, **render_kwargs):
|
||||||
|
if render_mode == "plot_trajectory":
|
||||||
|
if self._steps == 1:
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
# plt.ion()
|
||||||
|
self.fig, self.axs = plt.subplots(3, 1)
|
||||||
|
|
||||||
|
if self._steps <= 1750:
|
||||||
|
for ax, cp in zip(self.axs, self.current_pos[1::2]):
|
||||||
|
ax.scatter(self._steps, cp, s=2, marker=".")
|
||||||
|
|
||||||
|
# self.fig.show()
|
||||||
|
|
||||||
|
else:
|
||||||
|
super().render(render_mode, **render_kwargs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
env = ALRBallInACupEnv()
|
env = ALRBallInACupEnv()
|
||||||
|
@ -22,7 +22,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.goal_final_id = None
|
self.goal_final_id = None
|
||||||
self.collision_ids = None
|
self.collision_ids = None
|
||||||
self._is_collided = False
|
self._is_collided = False
|
||||||
self.collision_penalty = 1
|
self.collision_penalty = 1000
|
||||||
|
|
||||||
self.ball_traj = None
|
self.ball_traj = None
|
||||||
self.dists = None
|
self.dists = None
|
||||||
@ -74,11 +74,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
# cost_angle = (angle_min_dist - np.pi / 2)**2
|
# cost_angle = (angle_min_dist - np.pi / 2)**2
|
||||||
|
|
||||||
|
|
||||||
min_dist = self.dists[t_min_dist]
|
# min_dist = self.dists[t_min_dist]
|
||||||
dist_final = self.dists_final[-1]
|
dist_final = self.dists_final[-1]
|
||||||
min_dist_final = np.min(self.dists_final)
|
min_dist_final = np.min(self.dists_final)
|
||||||
|
|
||||||
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||||
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
|
@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# example_mujoco()
|
# example_mujoco()
|
||||||
example_mp("alr_envs:SimpleReacherDMP-v1")
|
# example_mp("alr_envs:SimpleReacherDMP-v1")
|
||||||
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
||||||
# example_async_contextual_sampler()
|
# example_async_contextual_sampler()
|
||||||
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
||||||
# env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
||||||
# example_async_sampler(env_name)
|
example_async_sampler(env_name)
|
||||||
# example_mp(env_name)
|
# example_mp(env_name)
|
||||||
|
Loading…
Reference in New Issue
Block a user