updates

2021-02-15 09:03:19 +01:00 · 2021-02-15 09:03:19 +01:00 · 77d0cbd00a
commit 77d0cbd00a
parent 95250af31c
4 changed files with 103 additions and 142 deletions
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@ -1,8 +1,9 @@
 from alr_envs.classic_control.hole_reacher import HoleReacher
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
+from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
 from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
-def make_env(rank, seed=0):
+def make_viapointreacher_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
@ -14,7 +15,38 @@ def make_env(rank, seed=0):
    """
    def _init():
-        env = HoleReacher(num_links=5,
+        _env = ViaPointReacher(num_links=5,
                               allow_self_collision=False,
                               collision_penalty=1000)
        _env = DmpEnvWrapper(_env,
                             num_dof=5,
                             num_basis=5,
                             duration=2,
                             alpha_phase=2,
                             dt=_env.dt,
                             start_pos=_env.start_pos,
                             learn_goal=False,
                             policy_type="velocity")
        _env.seed(seed + rank)
        return _env
    return _init
 def make_holereacher_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param rank: (int) index of the subprocess
    :returns a function that generates an environment
    """
    def _init():
        _env = HoleReacher(num_links=5,
                           allow_self_collision=False,
                           allow_wall_collision=False,
                           hole_width=0.15,
@ -22,13 +54,17 @@ def make_env(rank, seed=0):
                           hole_x=1,
                           collision_penalty=100000)
-        env = DmpEnvWrapperVel(env,
+        _env = DmpEnvWrapper(_env,
                             num_dof=5,
                             num_basis=5,
                             duration=2,
-                               dt=env.dt,
+                             dt=_env.dt,
-                               learn_goal=True)
+                             learn_goal=True,
-        env.seed(seed + rank)
+                             alpha_phase=2,
-        return env
+                             start_pos=_env.start_pos,
                             policy_type="velocity"
                             )
        _env.seed(seed + rank)
        return _env
    return _init
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@ -13,29 +13,24 @@ def intersect(A, B, C, D):
    return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
-class HoleReacher(gym.Env):
+class ViaPointReacher(gym.Env):
-    def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
+    def __init__(self, num_links, allow_self_collision=False,
-                 allow_wall_collision=False, collision_penalty=1000):
+                 collision_penalty=1000):
        self.hole_x = hole_x  # x-position of center of hole
        self.hole_width = hole_width  # width of hole
        self.hole_depth = hole_depth  # depth of hole
        self.num_links = num_links
        self.link_lengths = np.ones((num_links, 1))
        self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
        self.top_center_of_hole = np.hstack([hole_x, 0])
        self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
        self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
        self.allow_self_collision = allow_self_collision
        self.allow_wall_collision = allow_wall_collision
        self.collision_penalty = collision_penalty
        self.via_point = np.ones(2)
        self.goal_point = np.array((num_links, 0))
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
        self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
        self.start_vel = np.zeros(self.num_links)
-        self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
+        self.weight_matrix_scale = 1
        self.dt = 0.01
        self.time_limit = 2
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
        self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
        self.fig = None
        rect_1 = patches.Rectangle((-self.num_links, -1),
                                   self.num_links + self.hole_x - self.hole_width / 2, 1,
                                   fill=True, edgecolor='k', facecolor='k')
        rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
                                   self.num_links - self.hole_x + self.hole_width / 2, 1,
                                   fill=True, edgecolor='k', facecolor='k')
        rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
                                   1 - self.hole_depth,
                                   fill=True, edgecolor='k', facecolor='k')
        self.patches = [rect_1, rect_2, rect_3]
    @property
    def end_effector(self):
        return self._joints[self.num_links].T
    def configure(self, context):
        pass
    def reset(self):
        self._joint_angles = self.start_pos
        self._angle_velocity = self.start_vel
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):
        dist_reward = 0
        if not self._is_collided:
-            if self._steps == 180:
+            if self._steps == 100:
-                dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
+                dist_reward = np.linalg.norm(self.end_effector - self.via_point)
-        else:
+            if self._steps == 200:
-            dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
+                dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
        reward = - dist_reward ** 2
        reward -= 1e-6 * np.sum(acc**2)
-        if self._steps == 180:
+        if self._steps == 200:
            reward -= 0.1 * np.sum(vel**2) ** 2
        if self._is_collided:
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
        self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
        self_collision = False
        wall_collision = False
        if not self.allow_self_collision:
            self_collision = self.check_self_collision(line_points_in_taskspace)
            if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
                self_collision = True
-        if not self.allow_wall_collision:
+        self._is_collided = self_collision
            wall_collision = self.check_wall_collision(line_points_in_taskspace)
        self._is_collided = self_collision or wall_collision
    def _get_obs(self):
        theta = self._joint_angles
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
            np.cos(theta),
            np.sin(theta),
            self._angle_velocity,
-            self.end_effector - self.bottom_center_of_hole,
+            self.end_effector - self.via_point,
            self.end_effector - self.goal_point,
            self._steps
        ])
@ -237,17 +221,14 @@ class HoleReacher(gym.Env):
        if mode == "human":
            plt.cla()
-            plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")
+            plt.title(f"Iteration: {self._steps}")
            # Arm
            plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
            # Add the patch to the Axes
            [plt.gca().add_patch(rect) for rect in self.patches]
            lim = np.sum(self.link_lengths) + 0.5
            plt.xlim([-lim, lim])
-            plt.ylim([-1.1, lim])
+            plt.ylim([-lim, lim])
            # plt.draw()
            plt.pause(1e-4) #  pushes window to foreground, which is annoying.
            # self.fig.canvas.flush_events()
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
 if __name__ == '__main__':
    nl = 5
    render_mode = "human"  # "human" or "partial" or "final"
-    env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1)
+    env = ViaPointReacher(num_links=nl, allow_self_collision=False)
    env.reset()
-    # env.render(mode=render_mode)
+    env.render(mode=render_mode)
-    for i in range(200):
+    for i in range(300):
        # objective.load_result("/tmp/cma")
        # test with random actions
-        ac = 2 * env.action_space.sample()
+        ac = env.action_space.sample()
        # ac[0] += np.pi/2
        obs, rew, d, info = env.step(ac)
        env.render(mode=render_mode)
--- a/dmp_env_wrapper_example.py
+++ b/dmp_env_wrapper_example.py
@ -1,52 +1,19 @@
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
+from alr_envs.classic_control.utils import make_viapointreacher_env
-from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
+from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
 from alr_envs.classic_control.hole_reacher import HoleReacher
 import numpy as np
 if __name__ == "__main__":
-    def make_env(rank, seed=0):
+    n_samples = 10
-        """
+    n_cpus = 4
-        Utility function for multiprocessed env.
+    dim = 25
-        :param env_id: (str) the environment ID
+    env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
-        :param num_env: (int) the number of environments you wish to have in subprocesses
+                            n_samples=n_samples)
        :param seed: (int) the inital seed for RNG
        :param rank: (int) index of the subprocess
        """
        def _init():
            _env = HoleReacher(num_links=5,
                               allow_self_collision=False,
                               allow_wall_collision=False,
                               hole_width=0.15,
                               hole_depth=1,
                               hole_x=1)
-            _env = DmpEnvWrapper(_env,
+    params = np.random.randn(n_samples, dim)
-                                 num_dof=5,
+    # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
                                 num_basis=5,
                                 duration=2,
                                 dt=_env.dt,
                                 learn_goal=True,
                                 alpha_phase=2,
                                 start_pos=_env.start_pos,
                                 policy_type="velocity"
                                 )
            _env.seed(seed + rank)
            return _env
        return _init
    n_samples = 4
    env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
                            n_samples=n_samples,
                            context="spawn",
                            shared_memory=False,
                            worker=_worker)
    # params = np.random.randn(4, 25)
    params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
    # env.reset()
    out = env(params)
--- a/dmp_pd_control_example.py
+++ b/dmp_pd_control_example.py
@ -1,55 +1,32 @@
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
+from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
-from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
+from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
 import numpy as np
 if __name__ == "__main__":
    def make_env(rank, seed=0):
        """
        Utility function for multiprocessed env.
        :param env_id: (str) the environment ID
        :param num_env: (int) the number of environments you wish to have in subprocesses
        :param seed: (int) the inital seed for RNG
        :param rank: (int) index of the subprocess
        """
        def _init():
            _env = ALRBallInACupEnv()
            _env = DmpEnvWrapper(_env,
                                 num_dof=3,
                                 num_basis=8,
                                 duration=3.5,
                                 alpha_phase=3,
                                 dt=_env.dt,
                                 learn_goal=False,
                                 start_pos=_env.start_pos[1::2],
                                 final_pos=_env.start_pos[1::2],
                                 policy_type="motor"
                                 )
            _env.seed(seed + rank)
            return _env
        return _init
    dim = 24
    n_cpus = 4
    n_samples = 10
-    vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
+    vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
-                                n_samples=n_samples,
+                                n_samples=n_samples)
                                context="spawn",
                                shared_memory=False,
                                worker=_worker)
-    params = 10 * np.random.randn(n_samples, dim)
+    # params = 10 * np.random.randn(n_samples, dim)
    params = np.array([[ -4.51280364,  24.43701373,  15.73282129, -12.13020392,
         -8.57305795,   2.79806606,  -6.38613201,   5.99309385,
         -2.05631886,  24.71684748,  14.05989949, -14.60456967,
         10.51933419,  -2.43715355,  -6.0767578 ,  13.06498129,
          6.18038374,  11.4153859 ,   1.40753639,   5.57082387,
          9.81989309,   3.60558787,  -9.66996754,  14.28519904]])
    out = vec_env(params)
-
+    print(out)
-    non_vec_env = make_env(0, 0)()
+    #
-
+    non_vec_env = make_simple_env(0, 0)()
-    params = 10 * np.random.randn(dim)
+    #
    # params = 10 * np.random.randn(dim)
    out2 = non_vec_env.rollout(params, render=True)