viapoint reacher reward bug fix
This commit is contained in:
parent
1e3f036478
commit
60e1673ee1
@ -23,11 +23,12 @@ def make_viapointreacher_env(rank, seed=0):
|
|||||||
num_dof=5,
|
num_dof=5,
|
||||||
num_basis=5,
|
num_basis=5,
|
||||||
duration=2,
|
duration=2,
|
||||||
alpha_phase=2,
|
alpha_phase=2.5,
|
||||||
dt=_env.dt,
|
dt=_env.dt,
|
||||||
start_pos=_env.start_pos,
|
start_pos=_env.start_pos,
|
||||||
learn_goal=False,
|
learn_goal=False,
|
||||||
policy_type="velocity")
|
policy_type="velocity",
|
||||||
|
weights_scale=50)
|
||||||
_env.seed(seed + rank)
|
_env.seed(seed + rank)
|
||||||
return _env
|
return _env
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ class ViaPointReacher(gym.Env):
|
|||||||
if not self._is_collided:
|
if not self._is_collided:
|
||||||
if self._steps == 100:
|
if self._steps == 100:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.via_point)
|
dist_reward = np.linalg.norm(self.end_effector - self.via_point)
|
||||||
if self._steps == 200:
|
if self._steps == 199:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
|
dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
|
||||||
|
|
||||||
reward = - dist_reward ** 2
|
reward = - dist_reward ** 2
|
||||||
|
@ -14,10 +14,19 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
test_env = make_viapointreacher_env(0)()
|
test_env = make_viapointreacher_env(0)()
|
||||||
|
|
||||||
params = np.random.randn(n_samples, dim)
|
# params = np.random.randn(n_samples, dim)
|
||||||
|
params = np.array([ 217.54494933, -1.85169983, 24.08414447, 42.23816868,
|
||||||
|
23.32071702, 7.60780651, -31.74777741, 265.50634253,
|
||||||
|
463.43822562, 245.93948374, -272.64003621, -45.24999553,
|
||||||
|
503.21185823, 809.17742517, 393.12387021, -196.54196471,
|
||||||
|
6.79327307, 374.82429078, 552.4119579 , 197.3963343 ,
|
||||||
|
243.87357056, -39.56041541, -616.93957463, -710.0772516 ,
|
||||||
|
-414.21769789])
|
||||||
|
|
||||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||||
|
|
||||||
test_env.rollout(params, render=True)
|
rew, info = test_env.rollout(params, render=True)
|
||||||
|
print(rew)
|
||||||
|
|
||||||
# out = env(params)
|
# out = env(params)
|
||||||
# print(out)
|
# print(out)
|
||||||
|
Loading…
Reference in New Issue
Block a user