reacher adjustments
This commit is contained in:
parent
d313795cec
commit
1881c14a48
@ -97,6 +97,7 @@ register(
|
|||||||
"hole_depth": 1,
|
"hole_depth": 1,
|
||||||
"hole_x": None,
|
"hole_x": None,
|
||||||
"collision_penalty": 100,
|
"collision_penalty": 100,
|
||||||
|
"rew_fct": "unbounded"
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -354,7 +355,7 @@ for _v in _versions:
|
|||||||
"wrappers": [classic_control.hole_reacher.MPWrapper],
|
"wrappers": [classic_control.hole_reacher.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
"num_basis": 5,
|
"num_basis": 3,
|
||||||
"duration": 2,
|
"duration": 2,
|
||||||
"policy_type": "velocity",
|
"policy_type": "velocity",
|
||||||
"weights_scale": 5,
|
"weights_scale": 5,
|
||||||
@ -402,7 +403,7 @@ for _v in _versions:
|
|||||||
"wrappers": [mujoco.reacher.MPWrapper],
|
"wrappers": [mujoco.reacher.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 5 if "long" not in _v.lower() else 7,
|
"num_dof": 5 if "long" not in _v.lower() else 7,
|
||||||
"num_basis": 1,
|
"num_basis": 2,
|
||||||
"duration": 4,
|
"duration": 4,
|
||||||
"policy_type": "motor",
|
"policy_type": "motor",
|
||||||
"weights_scale": 5,
|
"weights_scale": 5,
|
||||||
|
@ -39,14 +39,18 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
reward_dist = 0.0
|
reward_dist = 0.0
|
||||||
angular_vel = 0.0
|
angular_vel = 0.0
|
||||||
reward_balance = 0.0
|
reward_balance = 0.0
|
||||||
|
is_delayed = self.steps_before_reward > 0
|
||||||
|
reward_ctrl = - np.square(a).sum()
|
||||||
if self._steps >= self.steps_before_reward:
|
if self._steps >= self.steps_before_reward:
|
||||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||||
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
||||||
if self.steps_before_reward > 0:
|
if is_delayed:
|
||||||
# avoid giving this penalty for normal step based case
|
# avoid giving this penalty for normal step based case
|
||||||
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
||||||
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
|
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
|
||||||
reward_ctrl = - 10 * np.square(a).sum()
|
if is_delayed:
|
||||||
|
# Higher control penalty for sparse reward per timestep
|
||||||
|
reward_ctrl *= 10
|
||||||
|
|
||||||
if self.balance:
|
if self.balance:
|
||||||
reward_balance -= self.balance_weight * np.abs(
|
reward_balance -= self.balance_weight * np.abs(
|
||||||
|
Loading…
Reference in New Issue
Block a user