updates

2021-02-15 09:03:19 +01:00 · 2021-02-15 09:03:19 +01:00 · 77d0cbd00a
commit 77d0cbd00a
parent 95250af31c
4 changed files with 103 additions and 142 deletions
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@ -1,8 +1,9 @@
 from alr_envs.classic_control.hole_reacher import HoleReacher
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
+from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
+from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper


-def make_env(rank, seed=0):
+def make_viapointreacher_env(rank, seed=0):
    """
    Utility function for multiprocessed env.

@ -14,21 +15,56 @@ def make_env(rank, seed=0):
    """

    def _init():
-        env = HoleReacher(num_links=5,
-                          allow_self_collision=False,
-                          allow_wall_collision=False,
-                          hole_width=0.15,
-                          hole_depth=1,
-                          hole_x=1,
-                          collision_penalty=100000)
+        _env = ViaPointReacher(num_links=5,
+                               allow_self_collision=False,
+                               collision_penalty=1000)

-        env = DmpEnvWrapperVel(env,
-                               num_dof=5,
-                               num_basis=5,
-                               duration=2,
-                               dt=env.dt,
-                               learn_goal=True)
-        env.seed(seed + rank)
-        return env
+        _env = DmpEnvWrapper(_env,
+                             num_dof=5,
+                             num_basis=5,
+                             duration=2,
+                             alpha_phase=2,
+                             dt=_env.dt,
+                             start_pos=_env.start_pos,
+                             learn_goal=False,
+                             policy_type="velocity")
+        _env.seed(seed + rank)
+        return _env
+
+    return _init
+
+
+def make_holereacher_env(rank, seed=0):
+    """
+    Utility function for multiprocessed env.
+
+    :param env_id: (str) the environment ID
+    :param num_env: (int) the number of environments you wish to have in subprocesses
+    :param seed: (int) the initial seed for RNG
+    :param rank: (int) index of the subprocess
+    :returns a function that generates an environment
+    """
+
+    def _init():
+        _env = HoleReacher(num_links=5,
+                           allow_self_collision=False,
+                           allow_wall_collision=False,
+                           hole_width=0.15,
+                           hole_depth=1,
+                           hole_x=1,
+                           collision_penalty=100000)
+
+        _env = DmpEnvWrapper(_env,
+                             num_dof=5,
+                             num_basis=5,
+                             duration=2,
+                             dt=_env.dt,
+                             learn_goal=True,
+                             alpha_phase=2,
+                             start_pos=_env.start_pos,
+                             policy_type="velocity"
+                             )
+        _env.seed(seed + rank)
+        return _env

    return _init
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@ -13,29 +13,24 @@ def intersect(A, B, C, D):
    return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)


-class HoleReacher(gym.Env):
+class ViaPointReacher(gym.Env):

-    def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
-                 allow_wall_collision=False, collision_penalty=1000):
-        self.hole_x = hole_x  # x-position of center of hole
-        self.hole_width = hole_width  # width of hole
-        self.hole_depth = hole_depth  # depth of hole
+    def __init__(self, num_links, allow_self_collision=False,
+                 collision_penalty=1000):
        self.num_links = num_links
        self.link_lengths = np.ones((num_links, 1))
-        self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
-        self.top_center_of_hole = np.hstack([hole_x, 0])
-        self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
-        self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
        self.allow_self_collision = allow_self_collision
-        self.allow_wall_collision = allow_wall_collision
        self.collision_penalty = collision_penalty

+        self.via_point = np.ones(2)
+        self.goal_point = np.array((num_links, 0))
+
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
        self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
        self.start_vel = np.zeros(self.num_links)
-        self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
+        self.weight_matrix_scale = 1

        self.dt = 0.01
        self.time_limit = 2
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
        self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)

        self.fig = None
-        rect_1 = patches.Rectangle((-self.num_links, -1),
-                                   self.num_links + self.hole_x - self.hole_width / 2, 1,
-                                   fill=True, edgecolor='k', facecolor='k')
-        rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
-                                   self.num_links - self.hole_x + self.hole_width / 2, 1,
-                                   fill=True, edgecolor='k', facecolor='k')
-        rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
-                                   1 - self.hole_depth,
-                                   fill=True, edgecolor='k', facecolor='k')
-
-        self.patches = [rect_1, rect_2, rect_3]

    @property
    def end_effector(self):
        return self._joints[self.num_links].T

+    def configure(self, context):
+        pass
+
    def reset(self):
        self._joint_angles = self.start_pos
        self._angle_velocity = self.start_vel
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):

        dist_reward = 0
        if not self._is_collided:
-            if self._steps == 180:
-                dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
-        else:
-            dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
+            if self._steps == 100:
+                dist_reward = np.linalg.norm(self.end_effector - self.via_point)
+            if self._steps == 200:
+                dist_reward = np.linalg.norm(self.end_effector - self.goal_point)

        reward = - dist_reward ** 2

        reward -= 1e-6 * np.sum(acc**2)

-        if self._steps == 180:
+        if self._steps == 200:
            reward -= 0.1 * np.sum(vel**2) ** 2

        if self._is_collided:
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
        self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]

        self_collision = False
-        wall_collision = False

        if not self.allow_self_collision:
            self_collision = self.check_self_collision(line_points_in_taskspace)
            if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
                self_collision = True

-        if not self.allow_wall_collision:
-            wall_collision = self.check_wall_collision(line_points_in_taskspace)
-
-        self._is_collided = self_collision or wall_collision
+        self._is_collided = self_collision

    def _get_obs(self):
        theta = self._joint_angles
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
            np.cos(theta),
            np.sin(theta),
            self._angle_velocity,
-            self.end_effector - self.bottom_center_of_hole,
+            self.end_effector - self.via_point,
+            self.end_effector - self.goal_point,
            self._steps
        ])

@ -237,17 +221,14 @@ class HoleReacher(gym.Env):

        if mode == "human":
            plt.cla()
-            plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")
+            plt.title(f"Iteration: {self._steps}")

            # Arm
            plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')

-            # Add the patch to the Axes
-            [plt.gca().add_patch(rect) for rect in self.patches]
-
            lim = np.sum(self.link_lengths) + 0.5
            plt.xlim([-lim, lim])
-            plt.ylim([-1.1, lim])
+            plt.ylim([-lim, lim])
            # plt.draw()
            plt.pause(1e-4) #  pushes window to foreground, which is annoying.
            # self.fig.canvas.flush_events()
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
 if __name__ == '__main__':
    nl = 5
    render_mode = "human"  # "human" or "partial" or "final"
-    env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1)
+    env = ViaPointReacher(num_links=nl, allow_self_collision=False)
    env.reset()
-    # env.render(mode=render_mode)
+    env.render(mode=render_mode)

-    for i in range(200):
+    for i in range(300):
        # objective.load_result("/tmp/cma")
        # test with random actions
-        ac = 2 * env.action_space.sample()
+        ac = env.action_space.sample()
        # ac[0] += np.pi/2
        obs, rew, d, info = env.step(ac)
        env.render(mode=render_mode)
--- a/dmp_env_wrapper_example.py
+++ b/dmp_env_wrapper_example.py
@ -1,52 +1,19 @@
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
-from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
-from alr_envs.classic_control.hole_reacher import HoleReacher
+from alr_envs.classic_control.utils import make_viapointreacher_env
+from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
 import numpy as np


 if __name__ == "__main__":

-    def make_env(rank, seed=0):
-        """
-        Utility function for multiprocessed env.
+    n_samples = 10
+    n_cpus = 4
+    dim = 25

-        :param env_id: (str) the environment ID
-        :param num_env: (int) the number of environments you wish to have in subprocesses
-        :param seed: (int) the inital seed for RNG
-        :param rank: (int) index of the subprocess
-        """
-        def _init():
-            _env = HoleReacher(num_links=5,
-                               allow_self_collision=False,
-                               allow_wall_collision=False,
-                               hole_width=0.15,
-                               hole_depth=1,
-                               hole_x=1)
+    env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
+                            n_samples=n_samples)

-            _env = DmpEnvWrapper(_env,
-                                 num_dof=5,
-                                 num_basis=5,
-                                 duration=2,
-                                 dt=_env.dt,
-                                 learn_goal=True,
-                                 alpha_phase=2,
-                                 start_pos=_env.start_pos,
-                                 policy_type="velocity"
-                                 )
-            _env.seed(seed + rank)
-            return _env
-        return _init
-
-    n_samples = 4
-
-    env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
-                            n_samples=n_samples,
-                            context="spawn",
-                            shared_memory=False,
-                            worker=_worker)
-
-    # params = np.random.randn(4, 25)
-    params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
+    params = np.random.randn(n_samples, dim)
+    # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])

    # env.reset()
    out = env(params)
--- a/dmp_pd_control_example.py
+++ b/dmp_pd_control_example.py
@ -1,55 +1,32 @@
-from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
-from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
-from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
+from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
+from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
 import numpy as np


 if __name__ == "__main__":

-    def make_env(rank, seed=0):
-        """
-        Utility function for multiprocessed env.
-
-        :param env_id: (str) the environment ID
-        :param num_env: (int) the number of environments you wish to have in subprocesses
-        :param seed: (int) the inital seed for RNG
-        :param rank: (int) index of the subprocess
-        """
-        def _init():
-            _env = ALRBallInACupEnv()
-
-            _env = DmpEnvWrapper(_env,
-                                 num_dof=3,
-                                 num_basis=8,
-                                 duration=3.5,
-                                 alpha_phase=3,
-                                 dt=_env.dt,
-                                 learn_goal=False,
-                                 start_pos=_env.start_pos[1::2],
-                                 final_pos=_env.start_pos[1::2],
-                                 policy_type="motor"
-                                 )
-            _env.seed(seed + rank)
-            return _env
-        return _init
-
    dim = 24
+    n_cpus = 4

    n_samples = 10

-    vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
-                                n_samples=n_samples,
-                                context="spawn",
-                                shared_memory=False,
-                                worker=_worker)
+    vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
+                                n_samples=n_samples)

-    params = 10 * np.random.randn(n_samples, dim)
+    # params = 10 * np.random.randn(n_samples, dim)
+    params = np.array([[ -4.51280364,  24.43701373,  15.73282129, -12.13020392,
+         -8.57305795,   2.79806606,  -6.38613201,   5.99309385,
+         -2.05631886,  24.71684748,  14.05989949, -14.60456967,
+         10.51933419,  -2.43715355,  -6.0767578 ,  13.06498129,
+          6.18038374,  11.4153859 ,   1.40753639,   5.57082387,
+          9.81989309,   3.60558787,  -9.66996754,  14.28519904]])

    out = vec_env(params)
-
-    non_vec_env = make_env(0, 0)()
-
-    params = 10 * np.random.randn(dim)
+    print(out)
+    #
+    non_vec_env = make_simple_env(0, 0)()
+    #
+    # params = 10 * np.random.randn(dim)

    out2 = non_vec_env.rollout(params, render=True)