wip

2021-07-08 13:48:08 +02:00 · 2021-07-08 13:48:08 +02:00 · 7a725077e2
commit 7a725077e2
parent 80933eba09
5 changed files with 288 additions and 4 deletions
--- a/alr_envs/classic_control/base_reacher/init.py
+++ b/alr_envs/classic_control/base_reacher/init.py
--- a/alr_envs/classic_control/base_reacher/base_reacher_direct.py
+++ b/alr_envs/classic_control/base_reacher/base_reacher_direct.py
@ -0,0 +1,142 @@
 from typing import Iterable, Union
 from abc import ABCMeta, abstractmethod
 import gym
 import matplotlib.pyplot as plt
 import numpy as np
 from gym import spaces
 from gym.utils import seeding
 from alr_envs.classic_control.utils import check_self_collision
 class BaseReacherEnv(gym.Env):
    """
    Simple Reaching Task without any physics simulation.
    Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
    towards the end of the trajectory.
    """
    def __init__(self, n_links: int, random_start: bool = True,
                 allow_self_collision: bool = False, collision_penalty: float = 1000):
        super().__init__()
        self.link_lengths = np.ones(n_links)
        self.n_links = n_links
        self._dt = 0.01
        self.random_start = random_start
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
        self._is_collided = False
        self.allow_self_collision = allow_self_collision
        self.collision_penalty = collision_penalty
        self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
        self._start_vel = np.zeros(self.n_links)
        self.max_torque = 1
        self.steps_before_reward = 199
        action_bound = np.ones((self.n_links,)) * self.max_torque
        state_bound = np.hstack([
            [np.pi] * self.n_links,  # cos
            [np.pi] * self.n_links,  # sin
            [np.inf] * self.n_links,  # velocity
            [np.inf] * 2,  # x-y coordinates of target distance
            [np.inf]  # env steps, because reward start after n steps TODO: Maybe
        ])
        self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
        self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
        # containers for plotting
        self.metadata = {'render.modes': ["human"]}
        self.fig = None
        self._steps = 0
        self.seed()
    @property
    def dt(self) -> Union[float, int]:
        return self._dt
    @property
    def current_pos(self):
        return self._joint_angles.copy()
    @property
    def current_vel(self):
        return self._angle_velocity.copy()
    def reset(self):
        # Sample only orientation of first link, i.e. the arm is always straight.
        if self.random_start:
            first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
            self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
            self._start_pos = self._joint_angles.copy()
        else:
            self._joint_angles = self._start_pos
        self._angle_velocity = self._start_vel
        self._joints = np.zeros((self.n_links + 1, 2))
        self._update_joints()
        self._steps = 0
        return self._get_obs().copy()
    def step(self, action: np.ndarray):
        """
        A single step with action in torque space
        """
        # action = self._add_action_noise(action)
        ac = np.clip(action, -self.max_torque, self.max_torque)
        self._angle_velocity = self._angle_velocity + self.dt * ac
        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
        self._update_joints()
        if not self.allow_self_collision:
            self_collision = check_self_collision(line_points_in_taskspace)
            if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
                self_collision = True
        self._is_collided = self._check_collisions()
        reward, info = self._get_reward(action)
        self._steps += 1
        done = False
        return self._get_obs().copy(), reward, done, info
    def _update_joints(self):
        """
        update joints to get new end-effector position. The other links are only required for rendering.
        Returns:
        """
        angles = np.cumsum(self._joint_angles)
        x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
        self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
    @abstractmethod
    def _get_reward(self, action: np.ndarray) -> (float, dict):
        pass
    @abstractmethod
    def _get_obs(self) -> np.ndarray:
        pass
    @abstractmethod
    def _check_collisions(self) -> bool:
        pass
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def close(self):
        del self.fig
    @property
    def end_effector(self):
        return self._joints[self.n_links].T
--- a/alr_envs/classic_control/base_reacher/base_reacher_torque.py
+++ b/alr_envs/classic_control/base_reacher/base_reacher_torque.py
@ -0,0 +1,142 @@
 from typing import Iterable, Union
 from abc import ABCMeta, abstractmethod
 import gym
 import matplotlib.pyplot as plt
 import numpy as np
 from gym import spaces
 from gym.utils import seeding
 from alr_envs.classic_control.utils import check_self_collision
 class BaseReacherEnv(gym.Env):
    """
    Simple Reaching Task without any physics simulation.
    Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
    towards the end of the trajectory.
    """
    def __init__(self, n_links: int, random_start: bool = True,
                 allow_self_collision: bool = False, collision_penalty: float = 1000):
        super().__init__()
        self.link_lengths = np.ones(n_links)
        self.n_links = n_links
        self._dt = 0.01
        self.random_start = random_start
        self._joints = None
        self._joint_angles = None
        self._angle_velocity = None
        self._is_collided = False
        self.allow_self_collision = allow_self_collision
        self.collision_penalty = collision_penalty
        self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)])
        self._start_vel = np.zeros(self.n_links)
        self.max_torque = 1
        self.steps_before_reward = 199
        action_bound = np.ones((self.n_links,)) * self.max_torque
        state_bound = np.hstack([
            [np.pi] * self.n_links,  # cos
            [np.pi] * self.n_links,  # sin
            [np.inf] * self.n_links,  # velocity
            [np.inf] * 2,  # x-y coordinates of target distance
            [np.inf]  # env steps, because reward start after n steps TODO: Maybe
        ])
        self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape)
        self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
        # containers for plotting
        self.metadata = {'render.modes': ["human"]}
        self.fig = None
        self._steps = 0
        self.seed()
    @property
    def dt(self) -> Union[float, int]:
        return self._dt
    @property
    def current_pos(self):
        return self._joint_angles.copy()
    @property
    def current_vel(self):
        return self._angle_velocity.copy()
    def reset(self):
        # Sample only orientation of first link, i.e. the arm is always straight.
        if self.random_start:
            first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4)
            self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)])
            self._start_pos = self._joint_angles.copy()
        else:
            self._joint_angles = self._start_pos
        self._angle_velocity = self._start_vel
        self._joints = np.zeros((self.n_links + 1, 2))
        self._update_joints()
        self._steps = 0
        return self._get_obs().copy()
    def step(self, action: np.ndarray):
        """
        A single step with action in torque space
        """
        # action = self._add_action_noise(action)
        ac = np.clip(action, -self.max_torque, self.max_torque)
        self._angle_velocity = self._angle_velocity + self.dt * ac
        self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
        self._update_joints()
        if not self.allow_self_collision:
            self_collision = check_self_collision(line_points_in_taskspace)
            if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
                self_collision = True
        self._is_collided = self._check_collisions()
        reward, info = self._get_reward(action)
        self._steps += 1
        done = False
        return self._get_obs().copy(), reward, done, info
    def _update_joints(self):
        """
        update joints to get new end-effector position. The other links are only required for rendering.
        Returns:
        """
        angles = np.cumsum(self._joint_angles)
        x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
        self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
    @abstractmethod
    def _get_reward(self, action: np.ndarray) -> (float, dict):
        pass
    @abstractmethod
    def _get_obs(self) -> np.ndarray:
        pass
    @abstractmethod
    def _check_collisions(self) -> bool:
        pass
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def close(self):
        del self.fig
    @property
    def end_effector(self):
        return self._joints[self.n_links].T
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@ -10,7 +10,7 @@ def intersect(A, B, C, D):
 def check_self_collision(line_points):
-    """Checks whether line segments and intersect"""
+    """Checks whether line segments intersect"""
    for i, line1 in enumerate(line_points):
        for line2 in line_points[i + 2:, :, :]:
            if intersect(line1[0], line1[-1], line2[0], line2[-1]):
--- a/alr_envs/examples/examples_general.py
+++ b/alr_envs/examples/examples_general.py
@ -89,8 +89,8 @@ if __name__ == '__main__':
    # Basic gym task
    # example_general("Pendulum-v0", seed=10, iterations=200, render=True)
    #
-    # # Basis task from framework
+    # Basis task from framework
-    # example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True)
+    example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True)
    #
    # # OpenAI Mujoco task
    # example_general("HalfCheetah-v2", seed=10, render=True)
@ -99,4 +99,4 @@ if __name__ == '__main__':
    # example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=True)
    # Vectorized multiprocessing environments
-    example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
+    # example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)