updates
This commit is contained in:
parent
a0a9c9c7fb
commit
3876478b96
@ -178,7 +178,7 @@ register(
|
||||
"hole_width": None,
|
||||
"hole_depth": 1,
|
||||
"hole_x": None,
|
||||
"collision_penalty": 100,
|
||||
"collision_penalty": 1000,
|
||||
}
|
||||
)
|
||||
|
||||
@ -210,7 +210,7 @@ register(
|
||||
"hole_width": 0.25,
|
||||
"hole_depth": 1,
|
||||
"hole_x": 2,
|
||||
"collision_penalty": 2,
|
||||
"collision_penalty": 1000,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -43,7 +43,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
||||
reward_function = BallInACupReward
|
||||
else:
|
||||
raise ValueError("Unknown reward type")
|
||||
raise ValueError("Unknown reward type: {}".format(reward_type))
|
||||
self.reward_function = reward_function(self.sim_steps)
|
||||
|
||||
@property
|
||||
@ -106,7 +106,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
done = success or self._steps == self.sim_steps - 1 or is_collided
|
||||
self._steps += 1
|
||||
else:
|
||||
reward = -2
|
||||
reward = -2000
|
||||
success = False
|
||||
is_collided = False
|
||||
done = True
|
||||
@ -154,6 +154,22 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||
des_vel_full[5] = des_vel[2]
|
||||
return des_vel_full
|
||||
|
||||
def render(self, render_mode, **render_kwargs):
|
||||
if render_mode == "plot_trajectory":
|
||||
if self._steps == 1:
|
||||
import matplotlib.pyplot as plt
|
||||
# plt.ion()
|
||||
self.fig, self.axs = plt.subplots(3, 1)
|
||||
|
||||
if self._steps <= 1750:
|
||||
for ax, cp in zip(self.axs, self.current_pos[1::2]):
|
||||
ax.scatter(self._steps, cp, s=2, marker=".")
|
||||
|
||||
# self.fig.show()
|
||||
|
||||
else:
|
||||
super().render(render_mode, **render_kwargs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = ALRBallInACupEnv()
|
||||
|
@ -22,7 +22,7 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
||||
self.goal_final_id = None
|
||||
self.collision_ids = None
|
||||
self._is_collided = False
|
||||
self.collision_penalty = 1
|
||||
self.collision_penalty = 1000
|
||||
|
||||
self.ball_traj = None
|
||||
self.dists = None
|
||||
@ -74,11 +74,11 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
||||
# cost_angle = (angle_min_dist - np.pi / 2)**2
|
||||
|
||||
|
||||
min_dist = self.dists[t_min_dist]
|
||||
# min_dist = self.dists[t_min_dist]
|
||||
dist_final = self.dists_final[-1]
|
||||
min_dist_final = np.min(self.dists_final)
|
||||
|
||||
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||
# cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 5e-4 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||
|
@ -106,10 +106,10 @@ def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_
|
||||
|
||||
if __name__ == '__main__':
|
||||
# example_mujoco()
|
||||
example_mp("alr_envs:SimpleReacherDMP-v1")
|
||||
# example_mp("alr_envs:SimpleReacherDMP-v1")
|
||||
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
||||
# example_async_contextual_sampler()
|
||||
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
||||
# env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
||||
# example_async_sampler(env_name)
|
||||
env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
|
||||
example_async_sampler(env_name)
|
||||
# example_mp(env_name)
|
||||
|
Loading…
Reference in New Issue
Block a user