holereach025

This commit is contained in:
Maximilian Huettenrauch 2021-04-10 13:25:08 +02:00
parent 1fd44616bf
commit f6cef69225
4 changed files with 30 additions and 22 deletions

View File

@ -99,10 +99,10 @@ class HoleReacher(gym.Env):
if self._steps == 199: if self._steps == 199:
reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2
else: else:
if self.collision_penalty != 0: # if self.collision_penalty != 0:
reward = -self.collision_penalty # reward = -self.collision_penalty
else: # else:
reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 - self.collision_penalty
reward -= 5e-8 * np.sum(acc ** 2) reward -= 5e-8 * np.sum(acc ** 2)

View File

@ -52,21 +52,22 @@ def make_holereacher_env(rank, seed=0):
_env = HoleReacher(num_links=5, _env = HoleReacher(num_links=5,
allow_self_collision=False, allow_self_collision=False,
allow_wall_collision=False, allow_wall_collision=False,
hole_width=0.15, hole_width=0.25,
hole_depth=1, hole_depth=1,
hole_x=1, hole_x=2,
collision_penalty=100) collision_penalty=100)
_env = DmpEnvWrapper(_env, _env = DmpEnvWrapper(_env,
num_dof=5, num_dof=5,
num_basis=5, num_basis=5,
duration=2, duration=2,
bandwidth_factor=2,
dt=_env.dt, dt=_env.dt,
learn_goal=True, learn_goal=True,
alpha_phase=3.5, alpha_phase=2,
start_pos=_env.start_pos, start_pos=_env.start_pos,
policy_type="velocity", policy_type="velocity",
weights_scale=100, weights_scale=50,
goal_scale=0.1 goal_scale=0.1
) )
@ -103,7 +104,7 @@ def make_holereacher_fix_goal_env(rank, seed=0):
dt=_env.dt, dt=_env.dt,
learn_goal=False, learn_goal=False,
final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]), final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]),
alpha_phase=3.5, alpha_phase=3,
start_pos=_env.start_pos, start_pos=_env.start_pos,
policy_type="velocity", policy_type="velocity",
weights_scale=50, weights_scale=50,
@ -139,14 +140,15 @@ def make_holereacher_env_pmp(rank, seed=0):
_env = DetPMPEnvWrapper(_env, _env = DetPMPEnvWrapper(_env,
num_dof=5, num_dof=5,
num_basis=5, num_basis=5,
width=0.025, width=0.02,
off=0.,
policy_type="velocity", policy_type="velocity",
start_pos=_env.start_pos, start_pos=_env.start_pos,
duration=2, duration=2,
post_traj_time=0, post_traj_time=0,
dt=_env.dt, dt=_env.dt,
weights_scale=0.2, weights_scale=0.2,
zero_start=True, zero_start=False,
zero_goal=False zero_goal=False
) )
_env.seed(seed + rank) _env.seed(seed + rank)

View File

@ -14,8 +14,9 @@ class DmpEnvWrapper(gym.Wrapper):
start_pos=None, start_pos=None,
final_pos=None, final_pos=None,
duration=1, duration=1,
alpha_phase=2,
dt=0.01, dt=0.01,
alpha_phase=2,
bandwidth_factor=3,
learn_goal=False, learn_goal=False,
post_traj_time=0., post_traj_time=0.,
policy_type=None, policy_type=None,
@ -35,7 +36,10 @@ class DmpEnvWrapper(gym.Wrapper):
self.post_traj_steps = int(post_traj_time / dt) self.post_traj_steps = int(post_traj_time / dt)
phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration) phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=self.num_basis) basis_generator = DMPBasisGenerator(phase_generator,
duration=duration,
num_basis=self.num_basis,
basis_bandwidth_factor=bandwidth_factor)
self.dmp = dmps.DMP(num_dof=num_dof, self.dmp = dmps.DMP(num_dof=num_dof,
basis_generator=basis_generator, basis_generator=basis_generator,

View File

@ -8,21 +8,23 @@ if __name__ == "__main__":
n_samples = 1 n_samples = 1
n_cpus = 4 n_cpus = 4
dim = 15 dim = 30
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
# n_samples=n_samples) # n_samples=n_samples)
test_env = make_holereacher_env_pmp(0)() test_env = make_holereacher_env(0)()
# params = 1 * np.random.randn(dim) # params = 1 * np.random.randn(dim)
params = np.array([[ -0.13106822, -0.66268577, -1.37025136, -1.34813613, params = np.array([ -1.09434772, 7.09294269, 0.98756352, 1.61950682,
-0.34040336, -1.41684643, 2.81882318, -1.93383471, 2.66567135, 1.71267901, 8.20010847, 2.50496653,
-5.84213385, -3.8623558 , -1.31946267, 3.19346678, -0.34886972, 2.07807773, 8.68615904, 3.66578556,
-9.6581148 , -8.27402906, -0.42374776, -2.06852054, 5.24572097, -3.21506848, -0.28593896, 17.03756855,
7.21224904, -6.81061422, -9.54973119, -6.18636867, -5.88445032, 6.02197609, -3.73457261, -4.24772663,
-6.82998929, 13.00398992, -18.28106949, -6.06678165, 8.69382861, -10.99939646, 5.31356886, 8.57420996,
2.79744735]]) 1.05616879, 19.79831628, -23.53288774, -3.32974082,
-5.86463784, -9.68133089])
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])