updates
This commit is contained in:
		
							parent
							
								
									95250af31c
								
							
						
					
					
						commit
						77d0cbd00a
					
				| @ -1,8 +1,9 @@ | ||||
| from alr_envs.classic_control.hole_reacher import HoleReacher | ||||
| from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel | ||||
| from alr_envs.classic_control.viapoint_reacher import ViaPointReacher | ||||
| from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper | ||||
| 
 | ||||
| 
 | ||||
| def make_env(rank, seed=0): | ||||
| def make_viapointreacher_env(rank, seed=0): | ||||
|     """ | ||||
|     Utility function for multiprocessed env. | ||||
| 
 | ||||
| @ -14,21 +15,56 @@ def make_env(rank, seed=0): | ||||
|     """ | ||||
| 
 | ||||
|     def _init(): | ||||
|         env = HoleReacher(num_links=5, | ||||
|                           allow_self_collision=False, | ||||
|                           allow_wall_collision=False, | ||||
|                           hole_width=0.15, | ||||
|                           hole_depth=1, | ||||
|                           hole_x=1, | ||||
|                           collision_penalty=100000) | ||||
|         _env = ViaPointReacher(num_links=5, | ||||
|                                allow_self_collision=False, | ||||
|                                collision_penalty=1000) | ||||
| 
 | ||||
|         env = DmpEnvWrapperVel(env, | ||||
|                                num_dof=5, | ||||
|                                num_basis=5, | ||||
|                                duration=2, | ||||
|                                dt=env.dt, | ||||
|                                learn_goal=True) | ||||
|         env.seed(seed + rank) | ||||
|         return env | ||||
|         _env = DmpEnvWrapper(_env, | ||||
|                              num_dof=5, | ||||
|                              num_basis=5, | ||||
|                              duration=2, | ||||
|                              alpha_phase=2, | ||||
|                              dt=_env.dt, | ||||
|                              start_pos=_env.start_pos, | ||||
|                              learn_goal=False, | ||||
|                              policy_type="velocity") | ||||
|         _env.seed(seed + rank) | ||||
|         return _env | ||||
| 
 | ||||
|     return _init | ||||
| 
 | ||||
| 
 | ||||
| def make_holereacher_env(rank, seed=0): | ||||
|     """ | ||||
|     Utility function for multiprocessed env. | ||||
| 
 | ||||
|     :param env_id: (str) the environment ID | ||||
|     :param num_env: (int) the number of environments you wish to have in subprocesses | ||||
|     :param seed: (int) the initial seed for RNG | ||||
|     :param rank: (int) index of the subprocess | ||||
|     :returns a function that generates an environment | ||||
|     """ | ||||
| 
 | ||||
|     def _init(): | ||||
|         _env = HoleReacher(num_links=5, | ||||
|                            allow_self_collision=False, | ||||
|                            allow_wall_collision=False, | ||||
|                            hole_width=0.15, | ||||
|                            hole_depth=1, | ||||
|                            hole_x=1, | ||||
|                            collision_penalty=100000) | ||||
| 
 | ||||
|         _env = DmpEnvWrapper(_env, | ||||
|                              num_dof=5, | ||||
|                              num_basis=5, | ||||
|                              duration=2, | ||||
|                              dt=_env.dt, | ||||
|                              learn_goal=True, | ||||
|                              alpha_phase=2, | ||||
|                              start_pos=_env.start_pos, | ||||
|                              policy_type="velocity" | ||||
|                              ) | ||||
|         _env.seed(seed + rank) | ||||
|         return _env | ||||
| 
 | ||||
|     return _init | ||||
|  | ||||
| @ -13,29 +13,24 @@ def intersect(A, B, C, D): | ||||
|     return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) | ||||
| 
 | ||||
| 
 | ||||
| class HoleReacher(gym.Env): | ||||
| class ViaPointReacher(gym.Env): | ||||
| 
 | ||||
|     def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False, | ||||
|                  allow_wall_collision=False, collision_penalty=1000): | ||||
|         self.hole_x = hole_x  # x-position of center of hole | ||||
|         self.hole_width = hole_width  # width of hole | ||||
|         self.hole_depth = hole_depth  # depth of hole | ||||
|     def __init__(self, num_links, allow_self_collision=False, | ||||
|                  collision_penalty=1000): | ||||
|         self.num_links = num_links | ||||
|         self.link_lengths = np.ones((num_links, 1)) | ||||
|         self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth]) | ||||
|         self.top_center_of_hole = np.hstack([hole_x, 0]) | ||||
|         self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0]) | ||||
|         self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0]) | ||||
|         self.allow_self_collision = allow_self_collision | ||||
|         self.allow_wall_collision = allow_wall_collision | ||||
|         self.collision_penalty = collision_penalty | ||||
| 
 | ||||
|         self.via_point = np.ones(2) | ||||
|         self.goal_point = np.array((num_links, 0)) | ||||
| 
 | ||||
|         self._joints = None | ||||
|         self._joint_angles = None | ||||
|         self._angle_velocity = None | ||||
|         self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)]) | ||||
|         self.start_vel = np.zeros(self.num_links) | ||||
|         self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer | ||||
|         self.weight_matrix_scale = 1 | ||||
| 
 | ||||
|         self.dt = 0.01 | ||||
|         self.time_limit = 2 | ||||
| @ -52,22 +47,14 @@ class HoleReacher(gym.Env): | ||||
|         self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) | ||||
| 
 | ||||
|         self.fig = None | ||||
|         rect_1 = patches.Rectangle((-self.num_links, -1), | ||||
|                                    self.num_links + self.hole_x - self.hole_width / 2, 1, | ||||
|                                    fill=True, edgecolor='k', facecolor='k') | ||||
|         rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1), | ||||
|                                    self.num_links - self.hole_x + self.hole_width / 2, 1, | ||||
|                                    fill=True, edgecolor='k', facecolor='k') | ||||
|         rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width, | ||||
|                                    1 - self.hole_depth, | ||||
|                                    fill=True, edgecolor='k', facecolor='k') | ||||
| 
 | ||||
|         self.patches = [rect_1, rect_2, rect_3] | ||||
| 
 | ||||
|     @property | ||||
|     def end_effector(self): | ||||
|         return self._joints[self.num_links].T | ||||
| 
 | ||||
|     def configure(self, context): | ||||
|         pass | ||||
| 
 | ||||
|     def reset(self): | ||||
|         self._joint_angles = self.start_pos | ||||
|         self._angle_velocity = self.start_vel | ||||
| @ -94,16 +81,16 @@ class HoleReacher(gym.Env): | ||||
| 
 | ||||
|         dist_reward = 0 | ||||
|         if not self._is_collided: | ||||
|             if self._steps == 180: | ||||
|                 dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) | ||||
|         else: | ||||
|             dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) | ||||
|             if self._steps == 100: | ||||
|                 dist_reward = np.linalg.norm(self.end_effector - self.via_point) | ||||
|             if self._steps == 200: | ||||
|                 dist_reward = np.linalg.norm(self.end_effector - self.goal_point) | ||||
| 
 | ||||
|         reward = - dist_reward ** 2 | ||||
| 
 | ||||
|         reward -= 1e-6 * np.sum(acc**2) | ||||
| 
 | ||||
|         if self._steps == 180: | ||||
|         if self._steps == 200: | ||||
|             reward -= 0.1 * np.sum(vel**2) ** 2 | ||||
| 
 | ||||
|         if self._is_collided: | ||||
| @ -129,17 +116,13 @@ class HoleReacher(gym.Env): | ||||
|         self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] | ||||
| 
 | ||||
|         self_collision = False | ||||
|         wall_collision = False | ||||
| 
 | ||||
|         if not self.allow_self_collision: | ||||
|             self_collision = self.check_self_collision(line_points_in_taskspace) | ||||
|             if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: | ||||
|                 self_collision = True | ||||
| 
 | ||||
|         if not self.allow_wall_collision: | ||||
|             wall_collision = self.check_wall_collision(line_points_in_taskspace) | ||||
| 
 | ||||
|         self._is_collided = self_collision or wall_collision | ||||
|         self._is_collided = self_collision | ||||
| 
 | ||||
|     def _get_obs(self): | ||||
|         theta = self._joint_angles | ||||
| @ -147,7 +130,8 @@ class HoleReacher(gym.Env): | ||||
|             np.cos(theta), | ||||
|             np.sin(theta), | ||||
|             self._angle_velocity, | ||||
|             self.end_effector - self.bottom_center_of_hole, | ||||
|             self.end_effector - self.via_point, | ||||
|             self.end_effector - self.goal_point, | ||||
|             self._steps | ||||
|         ]) | ||||
| 
 | ||||
| @ -237,17 +221,14 @@ class HoleReacher(gym.Env): | ||||
| 
 | ||||
|         if mode == "human": | ||||
|             plt.cla() | ||||
|             plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}") | ||||
|             plt.title(f"Iteration: {self._steps}") | ||||
| 
 | ||||
|             # Arm | ||||
|             plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') | ||||
| 
 | ||||
|             # Add the patch to the Axes | ||||
|             [plt.gca().add_patch(rect) for rect in self.patches] | ||||
| 
 | ||||
|             lim = np.sum(self.link_lengths) + 0.5 | ||||
|             plt.xlim([-lim, lim]) | ||||
|             plt.ylim([-1.1, lim]) | ||||
|             plt.ylim([-lim, lim]) | ||||
|             # plt.draw() | ||||
|             plt.pause(1e-4) #  pushes window to foreground, which is annoying. | ||||
|             # self.fig.canvas.flush_events() | ||||
| @ -293,14 +274,14 @@ class HoleReacher(gym.Env): | ||||
| if __name__ == '__main__': | ||||
|     nl = 5 | ||||
|     render_mode = "human"  # "human" or "partial" or "final" | ||||
|     env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1) | ||||
|     env = ViaPointReacher(num_links=nl, allow_self_collision=False) | ||||
|     env.reset() | ||||
|     # env.render(mode=render_mode) | ||||
|     env.render(mode=render_mode) | ||||
| 
 | ||||
|     for i in range(200): | ||||
|     for i in range(300): | ||||
|         # objective.load_result("/tmp/cma") | ||||
|         # test with random actions | ||||
|         ac = 2 * env.action_space.sample() | ||||
|         ac = env.action_space.sample() | ||||
|         # ac[0] += np.pi/2 | ||||
|         obs, rew, d, info = env.step(ac) | ||||
|         env.render(mode=render_mode) | ||||
|  | ||||
| @ -1,52 +1,19 @@ | ||||
| from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper | ||||
| from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker | ||||
| from alr_envs.classic_control.hole_reacher import HoleReacher | ||||
| from alr_envs.classic_control.utils import make_viapointreacher_env | ||||
| from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv | ||||
| import numpy as np | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|     def make_env(rank, seed=0): | ||||
|         """ | ||||
|         Utility function for multiprocessed env. | ||||
|     n_samples = 10 | ||||
|     n_cpus = 4 | ||||
|     dim = 25 | ||||
| 
 | ||||
|         :param env_id: (str) the environment ID | ||||
|         :param num_env: (int) the number of environments you wish to have in subprocesses | ||||
|         :param seed: (int) the inital seed for RNG | ||||
|         :param rank: (int) index of the subprocess | ||||
|         """ | ||||
|         def _init(): | ||||
|             _env = HoleReacher(num_links=5, | ||||
|                                allow_self_collision=False, | ||||
|                                allow_wall_collision=False, | ||||
|                                hole_width=0.15, | ||||
|                                hole_depth=1, | ||||
|                                hole_x=1) | ||||
|     env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], | ||||
|                             n_samples=n_samples) | ||||
| 
 | ||||
|             _env = DmpEnvWrapper(_env, | ||||
|                                  num_dof=5, | ||||
|                                  num_basis=5, | ||||
|                                  duration=2, | ||||
|                                  dt=_env.dt, | ||||
|                                  learn_goal=True, | ||||
|                                  alpha_phase=2, | ||||
|                                  start_pos=_env.start_pos, | ||||
|                                  policy_type="velocity" | ||||
|                                  ) | ||||
|             _env.seed(seed + rank) | ||||
|             return _env | ||||
|         return _init | ||||
| 
 | ||||
|     n_samples = 4 | ||||
| 
 | ||||
|     env = DmpAsyncVectorEnv([make_env(i) for i in range(4)], | ||||
|                             n_samples=n_samples, | ||||
|                             context="spawn", | ||||
|                             shared_memory=False, | ||||
|                             worker=_worker) | ||||
| 
 | ||||
|     # params = np.random.randn(4, 25) | ||||
|     params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) | ||||
|     params = np.random.randn(n_samples, dim) | ||||
|     # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) | ||||
| 
 | ||||
|     # env.reset() | ||||
|     out = env(params) | ||||
|  | ||||
| @ -1,55 +1,32 @@ | ||||
| from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper | ||||
| from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker | ||||
| from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv | ||||
| from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env | ||||
| from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv | ||||
| import numpy as np | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|     def make_env(rank, seed=0): | ||||
|         """ | ||||
|         Utility function for multiprocessed env. | ||||
| 
 | ||||
|         :param env_id: (str) the environment ID | ||||
|         :param num_env: (int) the number of environments you wish to have in subprocesses | ||||
|         :param seed: (int) the inital seed for RNG | ||||
|         :param rank: (int) index of the subprocess | ||||
|         """ | ||||
|         def _init(): | ||||
|             _env = ALRBallInACupEnv() | ||||
| 
 | ||||
|             _env = DmpEnvWrapper(_env, | ||||
|                                  num_dof=3, | ||||
|                                  num_basis=8, | ||||
|                                  duration=3.5, | ||||
|                                  alpha_phase=3, | ||||
|                                  dt=_env.dt, | ||||
|                                  learn_goal=False, | ||||
|                                  start_pos=_env.start_pos[1::2], | ||||
|                                  final_pos=_env.start_pos[1::2], | ||||
|                                  policy_type="motor" | ||||
|                                  ) | ||||
|             _env.seed(seed + rank) | ||||
|             return _env | ||||
|         return _init | ||||
| 
 | ||||
|     dim = 24 | ||||
|     n_cpus = 4 | ||||
| 
 | ||||
|     n_samples = 10 | ||||
| 
 | ||||
|     vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)], | ||||
|                                 n_samples=n_samples, | ||||
|                                 context="spawn", | ||||
|                                 shared_memory=False, | ||||
|                                 worker=_worker) | ||||
|     vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)], | ||||
|                                 n_samples=n_samples) | ||||
| 
 | ||||
|     params = 10 * np.random.randn(n_samples, dim) | ||||
|     # params = 10 * np.random.randn(n_samples, dim) | ||||
|     params = np.array([[ -4.51280364,  24.43701373,  15.73282129, -12.13020392, | ||||
|          -8.57305795,   2.79806606,  -6.38613201,   5.99309385, | ||||
|          -2.05631886,  24.71684748,  14.05989949, -14.60456967, | ||||
|          10.51933419,  -2.43715355,  -6.0767578 ,  13.06498129, | ||||
|           6.18038374,  11.4153859 ,   1.40753639,   5.57082387, | ||||
|           9.81989309,   3.60558787,  -9.66996754,  14.28519904]]) | ||||
| 
 | ||||
|     out = vec_env(params) | ||||
| 
 | ||||
|     non_vec_env = make_env(0, 0)() | ||||
| 
 | ||||
|     params = 10 * np.random.randn(dim) | ||||
|     print(out) | ||||
|     # | ||||
|     non_vec_env = make_simple_env(0, 0)() | ||||
|     # | ||||
|     # params = 10 * np.random.randn(dim) | ||||
| 
 | ||||
|     out2 = non_vec_env.rollout(params, render=True) | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user