diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py index 96c64f7..a5a153c 100644 --- a/alr_envs/classic_control/hole_reacher.py +++ b/alr_envs/classic_control/hole_reacher.py @@ -99,10 +99,10 @@ class HoleReacher(gym.Env): if self._steps == 199: reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 else: - if self.collision_penalty != 0: - reward = -self.collision_penalty - else: - reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 + # if self.collision_penalty != 0: + # reward = -self.collision_penalty + # else: + reward = - np.linalg.norm(self.end_effector - self.bottom_center_of_hole) ** 2 - self.collision_penalty reward -= 5e-8 * np.sum(acc ** 2) diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py index 1aaafb1..89061cb 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/classic_control/utils.py @@ -52,21 +52,22 @@ def make_holereacher_env(rank, seed=0): _env = HoleReacher(num_links=5, allow_self_collision=False, allow_wall_collision=False, - hole_width=0.15, + hole_width=0.25, hole_depth=1, - hole_x=1, + hole_x=2, collision_penalty=100) _env = DmpEnvWrapper(_env, num_dof=5, num_basis=5, duration=2, + bandwidth_factor=2, dt=_env.dt, learn_goal=True, - alpha_phase=3.5, + alpha_phase=2, start_pos=_env.start_pos, policy_type="velocity", - weights_scale=100, + weights_scale=50, goal_scale=0.1 ) @@ -103,7 +104,7 @@ def make_holereacher_fix_goal_env(rank, seed=0): dt=_env.dt, learn_goal=False, final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]), - alpha_phase=3.5, + alpha_phase=3, start_pos=_env.start_pos, policy_type="velocity", weights_scale=50, @@ -139,14 +140,15 @@ def make_holereacher_env_pmp(rank, seed=0): _env = DetPMPEnvWrapper(_env, num_dof=5, num_basis=5, - width=0.025, + width=0.02, + off=0., policy_type="velocity", start_pos=_env.start_pos, duration=2, post_traj_time=0, dt=_env.dt, weights_scale=0.2, - zero_start=True, + zero_start=False, zero_goal=False ) _env.seed(seed + rank) diff --git a/alr_envs/utils/dmp_env_wrapper.py b/alr_envs/utils/dmp_env_wrapper.py index 5a2f27b..6835d80 100644 --- a/alr_envs/utils/dmp_env_wrapper.py +++ b/alr_envs/utils/dmp_env_wrapper.py @@ -14,8 +14,9 @@ class DmpEnvWrapper(gym.Wrapper): start_pos=None, final_pos=None, duration=1, - alpha_phase=2, dt=0.01, + alpha_phase=2, + bandwidth_factor=3, learn_goal=False, post_traj_time=0., policy_type=None, @@ -35,7 +36,10 @@ class DmpEnvWrapper(gym.Wrapper): self.post_traj_steps = int(post_traj_time / dt) phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration) - basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=self.num_basis) + basis_generator = DMPBasisGenerator(phase_generator, + duration=duration, + num_basis=self.num_basis, + basis_bandwidth_factor=bandwidth_factor) self.dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, diff --git a/dmp_env_wrapper_example.py b/dmp_env_wrapper_example.py index 2e526e5..bb886c6 100644 --- a/dmp_env_wrapper_example.py +++ b/dmp_env_wrapper_example.py @@ -8,21 +8,23 @@ if __name__ == "__main__": n_samples = 1 n_cpus = 4 - dim = 15 + dim = 30 # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], # n_samples=n_samples) - test_env = make_holereacher_env_pmp(0)() + test_env = make_holereacher_env(0)() # params = 1 * np.random.randn(dim) - params = np.array([[ -0.13106822, -0.66268577, -1.37025136, -1.34813613, - -0.34040336, -1.41684643, 2.81882318, -1.93383471, - -5.84213385, -3.8623558 , -1.31946267, 3.19346678, - -9.6581148 , -8.27402906, -0.42374776, -2.06852054, - 7.21224904, -6.81061422, -9.54973119, -6.18636867, - -6.82998929, 13.00398992, -18.28106949, -6.06678165, - 2.79744735]]) + params = np.array([ -1.09434772, 7.09294269, 0.98756352, 1.61950682, + 2.66567135, 1.71267901, 8.20010847, 2.50496653, + -0.34886972, 2.07807773, 8.68615904, 3.66578556, + 5.24572097, -3.21506848, -0.28593896, 17.03756855, + -5.88445032, 6.02197609, -3.73457261, -4.24772663, + 8.69382861, -10.99939646, 5.31356886, 8.57420996, + 1.05616879, 19.79831628, -23.53288774, -3.32974082, + -5.86463784, -9.68133089]) + # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])