updates and bugfix in detpmp_wrapper
This commit is contained in:
parent
f5f12c846f
commit
4aa31a004a
@ -280,7 +280,7 @@ for v in versions:
|
|||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 2,
|
"duration": 2,
|
||||||
"width": 0.01,
|
"width": 0.025,
|
||||||
"policy_type": "velocity",
|
"policy_type": "velocity",
|
||||||
"weights_scale": 0.2,
|
"weights_scale": 0.2,
|
||||||
"zero_start": True
|
"zero_start": True
|
||||||
@ -352,7 +352,8 @@ register(
|
|||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 3.5,
|
"duration": 3.5,
|
||||||
"post_traj_time": 4.5,
|
"post_traj_time": 4.5,
|
||||||
"width": 0.005,
|
"width": 0.0035,
|
||||||
|
# "off": -0.05,
|
||||||
"policy_type": "motor",
|
"policy_type": "motor",
|
||||||
"weights_scale": 0.2,
|
"weights_scale": 0.2,
|
||||||
"zero_start": True,
|
"zero_start": True,
|
||||||
|
@ -73,12 +73,13 @@ class HoleReacherEnv(AlrEnv):
|
|||||||
|
|
||||||
acc = (action - self._angle_velocity) / self.dt
|
acc = (action - self._angle_velocity) / self.dt
|
||||||
self._angle_velocity = action
|
self._angle_velocity = action
|
||||||
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
|
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5)
|
||||||
self._update_joints()
|
self._update_joints()
|
||||||
|
|
||||||
reward, info = self._get_reward(acc)
|
reward, info = self._get_reward(acc)
|
||||||
|
|
||||||
info.update({"is_collided": self._is_collided})
|
info.update({"is_collided": self._is_collided})
|
||||||
|
self.end_effector_traj.append(np.copy(self.end_effector))
|
||||||
|
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
done = self._is_collided
|
done = self._is_collided
|
||||||
@ -101,6 +102,7 @@ class HoleReacherEnv(AlrEnv):
|
|||||||
self._joints = np.zeros((self.n_links + 1, 2))
|
self._joints = np.zeros((self.n_links + 1, 2))
|
||||||
self._update_joints()
|
self._update_joints()
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
self.end_effector_traj = []
|
||||||
|
|
||||||
return self._get_obs().copy()
|
return self._get_obs().copy()
|
||||||
|
|
||||||
|
@ -126,6 +126,7 @@
|
|||||||
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
|
<geom name="cup_base" pos="0 -0.035 0.1165" euler="-1.57 0 0" type="cylinder" size="0.038 0.0045" solref="-10000 -100"/>
|
||||||
<!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
|
<!-- <geom name="cup_base_contact" pos="0 -0.025 0.1165" euler="-1.57 0 0" type="cylinder" size="0.03 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>-->
|
||||||
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
|
<geom name="cup_base_contact" pos="0 -0.005 0.1165" euler="-1.57 0 0" type="cylinder" size="0.02 0.0005" solref="-10000 -100" rgba="0 0 255 1"/>
|
||||||
|
<geom name="cup_base_contact_below" pos="0 -0.04 0.1165" euler="-1.57 0 0" type="cylinder" size="0.035 0.001" solref="-10000 -100" rgba="255 0 255 1"/>
|
||||||
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
|
<!-- <geom name="cup_geom11" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup11" />-->
|
||||||
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
|
<!-- <geom name="cup_geom12" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup12" />-->
|
||||||
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
|
<!-- <geom name="cup_geom13" pos="0 0.05 0.055" euler="-1.57 0 0" solref="-10000 -100" type="mesh" mesh="cup13" />-->
|
||||||
|
@ -90,26 +90,27 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
reward_ctrl = - np.square(a).sum()
|
reward_ctrl = - np.square(a).sum()
|
||||||
|
|
||||||
crash = self.do_simulation(a)
|
crash = self.do_simulation(a)
|
||||||
joint_cons_viol = self.check_traj_in_joint_limits()
|
# joint_cons_viol = self.check_traj_in_joint_limits()
|
||||||
|
|
||||||
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
|
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
|
||||||
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
|
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
|
||||||
if not crash and not joint_cons_viol:
|
if not crash:
|
||||||
reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
|
reward, success, is_collided = self.reward_function.compute_reward(a, self)
|
||||||
done = success or self._steps == self.sim_steps - 1 or stop_sim
|
done = success or self._steps == self.sim_steps - 1 or is_collided
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
else:
|
else:
|
||||||
reward = -1000
|
reward = -2
|
||||||
success = False
|
success = False
|
||||||
|
is_collided = False
|
||||||
done = True
|
done = True
|
||||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||||
reward_ctrl=reward_ctrl,
|
reward_ctrl=reward_ctrl,
|
||||||
velocity=angular_vel,
|
velocity=angular_vel,
|
||||||
traj=self._q_pos, is_success=success,
|
traj=self._q_pos, is_success=success,
|
||||||
is_collided=crash or joint_cons_viol)
|
is_collided=is_collided, sim_crash=crash)
|
||||||
|
|
||||||
def check_traj_in_joint_limits(self):
|
def check_traj_in_joint_limits(self):
|
||||||
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
|
||||||
|
@ -6,7 +6,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
def __init__(self, sim_time):
|
def __init__(self, sim_time):
|
||||||
self.sim_time = sim_time
|
self.sim_time = sim_time
|
||||||
|
|
||||||
self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
|
self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
|
||||||
|
"wrist_palm_link_convex_geom",
|
||||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||||
"wrist_pitch_link_convex_decomposition_p2_geom",
|
"wrist_pitch_link_convex_decomposition_p2_geom",
|
||||||
"wrist_pitch_link_convex_decomposition_p3_geom",
|
"wrist_pitch_link_convex_decomposition_p3_geom",
|
||||||
@ -20,6 +21,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.goal_id = None
|
self.goal_id = None
|
||||||
self.goal_final_id = None
|
self.goal_final_id = None
|
||||||
self.collision_ids = None
|
self.collision_ids = None
|
||||||
|
self._is_collided = False
|
||||||
|
self.collision_penalty = 1
|
||||||
|
|
||||||
self.ball_traj = None
|
self.ball_traj = None
|
||||||
self.dists = None
|
self.dists = None
|
||||||
@ -36,49 +39,52 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.action_costs = []
|
self.action_costs = []
|
||||||
self.cup_angles = []
|
self.cup_angles = []
|
||||||
|
|
||||||
def compute_reward(self, action, sim, step, context=None):
|
def compute_reward(self, action, env):
|
||||||
self.ball_id = sim.model._body_name2id["ball"]
|
self.ball_id = env.sim.model._body_name2id["ball"]
|
||||||
self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
|
self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
|
||||||
self.goal_id = sim.model._site_name2id["cup_goal"]
|
self.goal_id = env.sim.model._site_name2id["cup_goal"]
|
||||||
self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
|
self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"]
|
||||||
self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
|
self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects]
|
||||||
|
|
||||||
ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
|
ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id)
|
||||||
|
|
||||||
# Compute the current distance from the ball to the inner part of the cup
|
# Compute the current distance from the ball to the inner part of the cup
|
||||||
goal_pos = sim.data.site_xpos[self.goal_id]
|
goal_pos = env.sim.data.site_xpos[self.goal_id]
|
||||||
ball_pos = sim.data.body_xpos[self.ball_id]
|
ball_pos = env.sim.data.body_xpos[self.ball_id]
|
||||||
goal_final_pos = sim.data.site_xpos[self.goal_final_id]
|
goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
|
||||||
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
|
||||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||||
self.ball_traj[step, :] = ball_pos
|
self.ball_traj[env._steps, :] = ball_pos
|
||||||
cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
|
cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
|
||||||
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
|
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
|
||||||
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
|
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
|
||||||
|
|
||||||
action_cost = np.sum(np.square(action))
|
action_cost = np.sum(np.square(action))
|
||||||
self.action_costs.append(action_cost)
|
self.action_costs.append(action_cost)
|
||||||
|
|
||||||
if self.check_collision(sim):
|
self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits()
|
||||||
reward = - 1000
|
|
||||||
return reward, False, True
|
|
||||||
|
|
||||||
if step == self.sim_time - 1:
|
if env._steps == env.sim_steps - 1 or self._is_collided:
|
||||||
t_min_dist = np.argmin(self.dists)
|
t_min_dist = np.argmin(self.dists)
|
||||||
angle_min_dist = self.cup_angles[t_min_dist]
|
angle_min_dist = self.cup_angles[t_min_dist]
|
||||||
cost_angle = (angle_min_dist - np.pi / 2)**2
|
cost_angle = (angle_min_dist - np.pi / 2)**2
|
||||||
|
|
||||||
min_dist = self.dists[t_min_dist]
|
min_dist = self.dists[t_min_dist]
|
||||||
dist_final = self.dists_final[-1]
|
dist_final = self.dists_final[-1]
|
||||||
|
min_dist_final = np.min(self.dists_final)
|
||||||
|
|
||||||
cost = 0.5 * min_dist + 0.5 * dist_final + 0.01 * cost_angle
|
cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
|
||||||
reward = np.exp(-2 * cost) - 1e-3 * action_cost
|
# reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
success = dist_final < 0.05 and ball_in_cup
|
# reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
|
reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
|
||||||
|
success = dist_final < 0.05 and ball_in_cup and not self._is_collided
|
||||||
|
crash = self._is_collided
|
||||||
else:
|
else:
|
||||||
reward = - 1e-3 * action_cost
|
reward = - 1e-5 * action_cost # TODO: increase action_cost weight
|
||||||
success = False
|
success = False
|
||||||
|
crash = False
|
||||||
|
|
||||||
return reward, success, False
|
return reward, success, crash
|
||||||
|
|
||||||
def check_ball_in_cup(self, sim, ball_collision_id):
|
def check_ball_in_cup(self, sim, ball_collision_id):
|
||||||
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
|
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
|
||||||
|
@ -12,16 +12,16 @@ class DetPMPWrapper(MPWrapper):
|
|||||||
zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
|
zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
|
||||||
self.duration = duration # seconds
|
self.duration = duration # seconds
|
||||||
|
|
||||||
|
dt = env.dt if hasattr(env, "dt") else dt
|
||||||
|
assert dt is not None
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
|
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
|
||||||
width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
|
width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
|
||||||
|
|
||||||
self.dt = env.dt if hasattr(env, "dt") else dt
|
|
||||||
assert self.dt is not None
|
|
||||||
|
|
||||||
action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
|
action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
|
||||||
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
|
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
|
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
|
||||||
off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
|
off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
|
||||||
pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
|
pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
|
||||||
|
Loading…
Reference in New Issue
Block a user