From 7a725077e24e4d5117afcb2155c600937f66e01d Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Thu, 8 Jul 2021 13:48:08 +0200 Subject: [PATCH 01/15] wip --- .../classic_control/base_reacher/__init__.py | 0 .../base_reacher/base_reacher_direct.py | 142 ++++++++++++++++++ .../base_reacher/base_reacher_torque.py | 142 ++++++++++++++++++ alr_envs/classic_control/utils.py | 2 +- alr_envs/examples/examples_general.py | 6 +- 5 files changed, 288 insertions(+), 4 deletions(-) create mode 100644 alr_envs/classic_control/base_reacher/__init__.py create mode 100644 alr_envs/classic_control/base_reacher/base_reacher_direct.py create mode 100644 alr_envs/classic_control/base_reacher/base_reacher_torque.py diff --git a/alr_envs/classic_control/base_reacher/__init__.py b/alr_envs/classic_control/base_reacher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/classic_control/base_reacher/base_reacher_direct.py new file mode 100644 index 0000000..12e5830 --- /dev/null +++ b/alr_envs/classic_control/base_reacher/base_reacher_direct.py @@ -0,0 +1,142 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.classic_control.utils import check_self_collision + + +class BaseReacherEnv(gym.Env): + """ + Simple Reaching Task without any physics simulation. + Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions + towards the end of the trajectory. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False, collision_penalty: float = 1000): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._is_collided = False + self.allow_self_collision = allow_self_collision + self.collision_penalty = collision_penalty + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self.max_torque = 1 + self.steps_before_reward = 199 + + action_bound = np.ones((self.n_links,)) * self.max_torque + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + def step(self, action: np.ndarray): + """ + A single step with action in torque space + """ + + # action = self._add_action_noise(action) + ac = np.clip(action, -self.max_torque, self.max_torque) + + self._angle_velocity = self._angle_velocity + self.dt * ac + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity + self._update_joints() + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + self._is_collided = self._check_collisions() + + reward, info = self._get_reward(action) + + self._steps += 1 + done = False + + return self._get_obs().copy(), reward, done, info + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T diff --git a/alr_envs/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/classic_control/base_reacher/base_reacher_torque.py new file mode 100644 index 0000000..12e5830 --- /dev/null +++ b/alr_envs/classic_control/base_reacher/base_reacher_torque.py @@ -0,0 +1,142 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.classic_control.utils import check_self_collision + + +class BaseReacherEnv(gym.Env): + """ + Simple Reaching Task without any physics simulation. + Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions + towards the end of the trajectory. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False, collision_penalty: float = 1000): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._is_collided = False + self.allow_self_collision = allow_self_collision + self.collision_penalty = collision_penalty + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self.max_torque = 1 + self.steps_before_reward = 199 + + action_bound = np.ones((self.n_links,)) * self.max_torque + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + def step(self, action: np.ndarray): + """ + A single step with action in torque space + """ + + # action = self._add_action_noise(action) + ac = np.clip(action, -self.max_torque, self.max_torque) + + self._angle_velocity = self._angle_velocity + self.dt * ac + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity + self._update_joints() + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + self._is_collided = self._check_collisions() + + reward, info = self._get_reward(action) + + self._steps += 1 + done = False + + return self._get_obs().copy(), reward, done, info + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py index fa8176a..ea378d1 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/classic_control/utils.py @@ -10,7 +10,7 @@ def intersect(A, B, C, D): def check_self_collision(line_points): - """Checks whether line segments and intersect""" + """Checks whether line segments intersect""" for i, line1 in enumerate(line_points): for line2 in line_points[i + 2:, :, :]: if intersect(line1[0], line1[-1], line2[0], line2[-1]): diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py index 99ca8f6..7ab77c1 100644 --- a/alr_envs/examples/examples_general.py +++ b/alr_envs/examples/examples_general.py @@ -89,8 +89,8 @@ if __name__ == '__main__': # Basic gym task # example_general("Pendulum-v0", seed=10, iterations=200, render=True) # - # # Basis task from framework - # example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True) + # Basis task from framework + example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True) # # # OpenAI Mujoco task # example_general("HalfCheetah-v2", seed=10, render=True) @@ -99,4 +99,4 @@ if __name__ == '__main__': # example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=True) # Vectorized multiprocessing environments - example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) + # example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) From 86f894c1ffec1fd41e7400345645f1552e372e10 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Mon, 15 Nov 2021 09:10:03 +0100 Subject: [PATCH 02/15] added promp wrapped environments --- alr_envs/__init__.py | 2 +- alr_envs/alr/__init__.py | 21 ++++++++- alr_envs/dmc/__init__.py | 2 +- .../examples/examples_motion_primitives.py | 5 ++- alr_envs/meta/__init__.py | 2 +- alr_envs/open_ai/__init__.py | 2 +- alr_envs/utils/make_env_helpers.py | 43 ++++++++++++++++++- 7 files changed, 70 insertions(+), 7 deletions(-) diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index e43e3b1..30fa7b8 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,5 +1,5 @@ from alr_envs import dmc, meta, open_ai -from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_rank +from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_promp_env, make_rank from alr_envs.utils import make_dmc # Convenience function for all MP environments diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 179839c..99ad5bb 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -9,7 +9,7 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.balancing import BalancingEnv -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # Classic Control ## Simple Reacher @@ -308,6 +308,25 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + _env_id = f'HoleReacherProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + _env_id = f'HoleReacherDetPMP-{_v}' register( id=_env_id, diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py index 17d1f7f..ca6469e 100644 --- a/alr_envs/dmc/__init__.py +++ b/alr_envs/dmc/__init__.py @@ -1,6 +1,6 @@ from . import manipulation, suite -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} from gym.envs.registration import register diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index 6decdb1..de365b7 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -147,10 +147,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # ProMP + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) + # DetProMP example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index fa63c94..9db0689 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -3,7 +3,7 @@ from gym import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper -ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # MetaWorld diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py index 51dd712..63083ca 100644 --- a/alr_envs/open_ai/__init__.py +++ b/alr_envs/open_ai/__init__.py @@ -3,7 +3,7 @@ from gym.wrappers import FlattenObservation from . import classic_control, mujoco, robotics -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # Short Continuous Mountain Car register( diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index fc73b05..19f54d6 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -7,6 +7,7 @@ from gym.envs.registration import EnvSpec from mp_env_api import MPEnvWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper +from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): @@ -132,6 +133,26 @@ def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs return DmpWrapper(_env, **mp_kwargs) +def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): + """ + This can also be used standalone for manually building a custom ProMP environment. + Args: + env_id: base_env_name, + wrappers: list of wrappers (at least an MPEnvWrapper), + mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} + + Returns: ProMP wrapped gym env + + """ + _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) + + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + + _verify_dof(_env, mp_kwargs.get("num_dof")) + + return ProMPWrapper(_env, **mp_kwargs) + + def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): """ This can also be used standalone for manually building a custom Det ProMP environment. @@ -140,7 +161,7 @@ def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwa wrappers: list of wrappers (at least an MPEnvWrapper), mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} - Returns: DMP wrapped gym env + Returns: Det ProMP wrapped gym env """ _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) @@ -171,6 +192,26 @@ def make_dmp_env_helper(**kwargs): mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) +def make_promp_env_helper(**kwargs): + """ + Helper function for registering ProMP gym environments. + This can also be used standalone for manually building a custom ProMP environment. + Args: + **kwargs: expects at least the following: + { + "name": base_env_name, + "wrappers": list of wrappers (at least an MPEnvWrapper), + "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int} + } + + Returns: ProMP wrapped gym env + + """ + seed = kwargs.pop("seed", None) + return make_promp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) + + def make_detpmp_env_helper(**kwargs): """ Helper function for registering ProMP gym environments. From 928c540251176deb6b9cee7a19bb191b55ce5741 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Wed, 17 Nov 2021 17:14:13 +0100 Subject: [PATCH 03/15] wip --- alr_envs/alr/__init__.py | 2 +- alr_envs/alr/classic_control/__init__.py | 2 +- .../classic_control/base_reacher/__init__.py | 0 .../base_reacher/base_reacher_direct.py | 53 ++-- .../base_reacher/base_reacher_torque.py | 0 .../hole_reacher/hole_reacher.py | 251 +++++++++++++++++- .../hole_reacher/mp_wrapper.py | 8 + .../hole_reacher/simple_reward.py | 48 ++++ alr_envs/alr/classic_control/utils.py | 3 + .../examples/examples_motion_primitives.py | 12 +- 10 files changed, 351 insertions(+), 28 deletions(-) rename alr_envs/{ => alr}/classic_control/base_reacher/__init__.py (100%) rename alr_envs/{ => alr}/classic_control/base_reacher/base_reacher_direct.py (73%) rename alr_envs/{ => alr}/classic_control/base_reacher/base_reacher_torque.py (100%) create mode 100644 alr_envs/alr/classic_control/hole_reacher/simple_reward.py diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 99ad5bb..70902b4 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -83,7 +83,7 @@ register( register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.alr.classic_control:HoleReacherEnvOld', max_episode_steps=200, kwargs={ "n_links": 5, diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py index 73575ab..38f0faf 100644 --- a/alr_envs/alr/classic_control/__init__.py +++ b/alr_envs/alr/classic_control/__init__.py @@ -1,3 +1,3 @@ -from .hole_reacher.hole_reacher import HoleReacherEnv +from .hole_reacher.hole_reacher import HoleReacherEnv, HoleReacherEnvOld from .simple_reacher.simple_reacher import SimpleReacherEnv from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv diff --git a/alr_envs/classic_control/base_reacher/__init__.py b/alr_envs/alr/classic_control/base_reacher/__init__.py similarity index 100% rename from alr_envs/classic_control/base_reacher/__init__.py rename to alr_envs/alr/classic_control/base_reacher/__init__.py diff --git a/alr_envs/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py similarity index 73% rename from alr_envs/classic_control/base_reacher/base_reacher_direct.py rename to alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index 12e5830..c4afaed 100644 --- a/alr_envs/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -5,7 +5,7 @@ import matplotlib.pyplot as plt import numpy as np from gym import spaces from gym.utils import seeding -from alr_envs.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.utils import intersect class BaseReacherEnv(gym.Env): @@ -16,7 +16,7 @@ class BaseReacherEnv(gym.Env): """ def __init__(self, n_links: int, random_start: bool = True, - allow_self_collision: bool = False, collision_penalty: float = 1000): + allow_self_collision: bool = False): super().__init__() self.link_lengths = np.ones(n_links) self.n_links = n_links @@ -24,19 +24,21 @@ class BaseReacherEnv(gym.Env): self.random_start = random_start + # state self._joints = None self._joint_angles = None self._angle_velocity = None - self._is_collided = False - self.allow_self_collision = allow_self_collision - self.collision_penalty = collision_penalty self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) self._start_vel = np.zeros(self.n_links) - self.max_torque = 1 + # joint limits + self.j_min = -np.pi * np.ones(n_links) + self.j_max = np.pi * np.ones(n_links) + + self.max_vel = 1 self.steps_before_reward = 199 - action_bound = np.ones((self.n_links,)) * self.max_torque + action_bound = np.ones((self.n_links,)) * self.max_vel state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -47,6 +49,8 @@ class BaseReacherEnv(gym.Env): self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + self.reward_function = None # Needs to be set in sub class + # containers for plotting self.metadata = {'render.modes': ["human"]} self.fig = None @@ -84,27 +88,21 @@ class BaseReacherEnv(gym.Env): def step(self, action: np.ndarray): """ - A single step with action in torque space + A single step with action in angular velocity space """ - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac + acc = (action - self._angle_velocity) / self.dt + self._angle_velocity = action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - self._is_collided = self._check_collisions() - reward, info = self._get_reward(action) + # reward, info = self._get_reward(action) + reward, info = self.reward_function.get_reward(self, acc) self._steps += 1 - done = False + done = self._terminate(info) return self._get_obs().copy(), reward, done, info @@ -118,6 +116,19 @@ class BaseReacherEnv(gym.Env): x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + def _check_self_collision(self): + """Checks whether line segments intersect""" + + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): + return True + + link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) + for i, line1 in enumerate(link_lines): + for line2 in link_lines[i + 2:, :]: + if intersect(line1[0], line1[-1], line2[0], line2[-1]): + return True + return False + @abstractmethod def _get_reward(self, action: np.ndarray) -> (float, dict): pass @@ -130,6 +141,10 @@ class BaseReacherEnv(gym.Env): def _check_collisions(self) -> bool: pass + @abstractmethod + def _terminate(self, info) -> bool: + return False + def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] diff --git a/alr_envs/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py similarity index 100% rename from alr_envs/classic_control/base_reacher/base_reacher_torque.py rename to alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index ee1a997..55aac9e 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -6,10 +6,243 @@ import numpy as np from gym.utils import seeding from matplotlib import patches +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherEnv from alr_envs.alr.classic_control.utils import check_self_collision -class HoleReacherEnv(gym.Env): +class HoleReacherEnv(BaseReacherEnv): + def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, + hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, + allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): + + super().__init__(n_links, random_start, allow_self_collision) + + # provided initial parameters + self.initial_x = hole_x # x-position of center of hole + self.initial_width = hole_width # width of hole + self.initial_depth = hole_depth # depth of hole + + # temp container for current env state + self._tmp_x = None + self._tmp_width = None + self._tmp_depth = None + self._goal = None # x-y coordinates for reaching the center at the bottom of the hole + + action_bound = np.pi * np.ones((self.n_links,)) + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf], # hole width + # [np.inf], # hole depth + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + if rew_fct == "simple": + from alr_envs.alr.classic_control.hole_reacher.simple_reward import HolereacherSimpleReward + self.reward_function = HolereacherSimpleReward(allow_self_collision, allow_wall_collision, collision_penalty) + + def reset(self): + self._generate_hole() + self._set_patches() + + return super().reset() + + def _terminate(self, info): + return info["is_collided"] + + def _generate_hole(self): + if self.initial_width is None: + width = self.np_random.uniform(0.15, 0.5) + else: + width = np.copy(self.initial_width) + if self.initial_x is None: + # sample whole on left or right side + direction = self.np_random.choice([-1, 1]) + # Hole center needs to be half the width away from the arm to give a valid setting. + x = direction * self.np_random.uniform(width / 2, 3.5) + else: + x = np.copy(self.initial_x) + if self.initial_depth is None: + # TODO we do not want this right now. + depth = self.np_random.uniform(1, 1) + else: + depth = np.copy(self.initial_depth) + + self._tmp_width = width + self._tmp_x = x + self._tmp_depth = depth + self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) + + self._line_ground_left = np.array([-self.n_links, 0, x - width / 2, 0]) + self._line_ground_right = np.array([x + width / 2, 0, self.n_links, 0]) + self._line_ground_hole = np.array([x - width / 2, -depth, x + width / 2, -depth]) + self._line_hole_left = np.array([x - width / 2, -depth, x - width / 2, 0]) + self._line_hole_right = np.array([x + width / 2, -depth, x + width / 2, 0]) + + self.ground_lines = np.stack((self._line_ground_left, + self._line_ground_right, + self._line_ground_hole, + self._line_hole_left, + self._line_hole_right)) + + def _get_obs(self): + theta = self._joint_angles + return np.hstack([ + np.cos(theta), + np.sin(theta), + self._angle_velocity, + self._tmp_width, + # self._tmp_hole_depth, + self.end_effector - self._goal, + self._steps + ]) + + def _get_line_points(self, num_points_per_link=1): + theta = self._joint_angles[:, None] + + intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 + accumulated_theta = np.cumsum(theta, axis=0) + end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) + + x = np.cos(accumulated_theta) * self.link_lengths[:, None] * intermediate_points + y = np.sin(accumulated_theta) * self.link_lengths[:, None] * intermediate_points + + end_effector[0, :, 0] = x[0, :] + end_effector[0, :, 1] = y[0, :] + + for i in range(1, self.n_links): + end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] + end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] + + # xy = np.stack((x, y), axis=2) + # + # self._joints[0] + np.cumsum(xy, axis=0) + + return np.squeeze(end_effector + self._joints[0, :]) + + def check_wall_collision(self): + line_points = self._get_line_points(num_points_per_link=100) + + # all points that are before the hole in x + r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_before_hole > 0: + return True + + # all points that are after the hole in x + r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_after_hole > 0: + return True + + # all points that are above the hole + r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) + + # check if any of those points are below surface + nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) + + if nr_line_points_below_surface_in_hole > 0: + return True + + return False + + # def check_wall_collision(self, ): + # """find the intersection of line segments A=(x1,y1)/(x2,y2) and + # B=(x3,y3)/(x4,y4). """ + # + # link_lines = np.hstack((self._joints[:-1, :], self._joints[1:, :])) + # + # all_points_product = np.hstack( + # [np.repeat(link_lines, len(self.ground_lines), axis=0), + # np.tile(self.ground_lines, (len(link_lines), 1))]) + # + # x1, y1, x2, y2, x3, y3, x4, y4 = all_points_product.T + # + # denom = ((x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)) + # # if denom == 0: + # # return False + # px = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / denom + # py = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / denom + # # if (px - x1) * (px - x2) < 0 and (py - y1) * (py - y2) < 0 \ + # # and (px - x3) * (px - x4) < 0 and (py - y3) * (py - y4) < 0: + # # return True # [px, py] + # test = ((px - x1) * (px - x2) <= 0) & ((py - y1) * (py - y2) <= 0) & ((px - x3) * (px - x4) <= 0) & ( + # (py - y3) * (py - y4) <= 0) + # if np.any(test): + # possible_collisions = np.stack((px, py)).T[test] + # for row in possible_collisions: + # if not np.any([np.allclose(row, x) for x in self._joints]): + # return True, row + # + # return False, None + + def render(self, mode='human'): + if self.fig is None: + # Create base figure once on the beginning. Afterwards only update + plt.ion() + self.fig = plt.figure() + ax = self.fig.add_subplot(1, 1, 1) + + # limits + lim = np.sum(self.link_lengths) + 0.5 + ax.set_xlim([-lim, lim]) + ax.set_ylim([-1.1, lim]) + + self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') + self._set_patches() + self.fig.show() + + self.fig.gca().set_title( + f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") + + if mode == "human": + + # arm + self.line.set_data(self._joints[:, 0], self._joints[:, 1]) + + self.fig.canvas.draw() + self.fig.canvas.flush_events() + + elif mode == "partial": + if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: + # Arm + plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', + alpha=self._steps / 200) + + def _set_patches(self): + if self.fig is not None: + # self.fig.gca().patches = [] + left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), + self.n_links + self._tmp_x - self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), + self.n_links - self._tmp_x + self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), + self._tmp_width, + 1 - self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + + # Add the patch to the Axes + self.fig.gca().add_patch(left_block) + self.fig.gca().add_patch(right_block) + self.fig.gca().add_patch(hole_floor) + + +class HoleReacherEnvOld(gym.Env): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, @@ -312,3 +545,19 @@ class HoleReacherEnv(gym.Env): super().close() if self.fig is not None: plt.close(self.fig) + + +if __name__ == "__main__": + import time + env = HoleReacherEnv(5) + env.reset() + + start = time.time() + for i in range(10000): + # env.check_wall_collision() + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) + # env.render() + if done: + env.reset() + print(time.time() - start) diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py index d951161..feb545f 100644 --- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py +++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py @@ -18,6 +18,14 @@ class MPWrapper(MPEnvWrapper): [False] # env steps ]) + # @property + # def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + # return self._joint_angles.copy() + # + # @property + # def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + # return self._angle_velocity.copy() + @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: return self.env.current_pos diff --git a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py b/alr_envs/alr/classic_control/hole_reacher/simple_reward.py new file mode 100644 index 0000000..0609a62 --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/simple_reward.py @@ -0,0 +1,48 @@ +import numpy as np +from alr_envs.alr.classic_control.utils import check_self_collision + + +class HolereacherSimpleReward: + def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): + self.collision_penalty = collision_penalty + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + self._is_collided = False + pass + + def get_reward(self, env, action): + reward = 0 + success = False + + self_collision = False + wall_collision = False + + # joints = np.hstack((env._joints[:-1, :], env._joints[1:, :])) + if not self.allow_self_collision: + self_collision = env._check_self_collision() + + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() + + self._is_collided = self_collision or wall_collision + + if env._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(env.end_effector - env._goal) + + success = dist < 0.005 and not self._is_collided + reward = - dist ** 2 - self.collision_penalty * self._is_collided + + info = {"is_success": success, + "is_collided": self._is_collided} + + acc = (action - env._angle_velocity) / env.dt + reward -= 5e-8 * np.sum(acc ** 2) + + return reward, info + + diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/alr/classic_control/utils.py index ea378d1..b557a0a 100644 --- a/alr_envs/alr/classic_control/utils.py +++ b/alr_envs/alr/classic_control/utils.py @@ -1,3 +1,6 @@ +import numpy as np + + def ccw(A, B, C): return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12 diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index de365b7..b307a52 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -147,18 +147,18 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP - example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) # ProMP - example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) + # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) # DetProMP - example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) # Altered basis functions - example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render) From 2543bcd7ece7c35fba1d57a895b081c167bfb731 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Fri, 26 Nov 2021 16:31:46 +0100 Subject: [PATCH 04/15] reacher envs cleanup - base classes for direct and torque control - added promp wrapper support --- alr_envs/alr/__init__.py | 41 +- .../base_reacher/base_reacher.py | 141 +++++++ .../base_reacher/base_reacher_direct.py | 138 +------ .../base_reacher/base_reacher_torque.py | 119 +----- .../hole_reacher/hole_reacher.py | 362 +----------------- .../hole_reacher/hole_reacher_old.py | 315 +++++++++++++++ .../hole_reacher/hr_dist_vel_acc_reward.py | 56 +++ .../{simple_reward.py => hr_simple_reward.py} | 29 +- .../simple_reacher/simple_reacher.py | 110 ++---- .../viapoint_reacher/viapoint_reacher.py | 147 ++----- .../examples/examples_motion_primitives.py | 8 +- 11 files changed, 645 insertions(+), 821 deletions(-) create mode 100644 alr_envs/alr/classic_control/base_reacher/base_reacher.py create mode 100644 alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py create mode 100644 alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py rename alr_envs/alr/classic_control/hole_reacher/{simple_reward.py => hr_simple_reward.py} (64%) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 70902b4..a364b96 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -83,7 +83,7 @@ register( register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnvOld', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -109,7 +109,7 @@ register( "hole_width": 0.25, "hole_depth": 1, "hole_x": 2, - "collision_penalty": 100, + "collision_penalty": 1, } ) @@ -220,6 +220,25 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + _env_id = f'{_name[0]}ProMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + _env_id = f'{_name[0]}DetPMP-{_name[1]}' register( id=_env_id, @@ -262,6 +281,24 @@ register( ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +register( + id="ViaPointReacherProMP-v0", + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") + register( id='ViaPointReacherDetPMP-v0', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py new file mode 100644 index 0000000..66b234f --- /dev/null +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -0,0 +1,141 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.alr.classic_control.utils import intersect + + +class BaseReacherEnv(gym.Env): + """ + Base class for all reaching environments. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + # state + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._acc = None + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + # joint limits + self.j_min = -np.pi * np.ones(n_links) + self.j_max = np.pi * np.ones(n_links) + + self.steps_before_reward = 199 + + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + self.reward_function = None # Needs to be set in sub class + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + @abstractmethod + def step(self, action: np.ndarray): + """ + A single step with action in angular velocity space + """ + raise NotImplementedError + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + def _check_self_collision(self): + """Checks whether line segments intersect""" + + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): + return True + + link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) + for i, line1 in enumerate(link_lines): + for line2 in link_lines[i + 2:, :]: + if intersect(line1[0], line1[-1], line2[0], line2[-1]): + return True + return False + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + @abstractmethod + def _terminate(self, info) -> bool: + return False + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index c4afaed..4f7b6f1 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -1,157 +1,35 @@ -from typing import Iterable, Union -from abc import ABCMeta, abstractmethod -import gym -import matplotlib.pyplot as plt -import numpy as np from gym import spaces -from gym.utils import seeding -from alr_envs.alr.classic_control.utils import intersect +import numpy as np +from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherEnv(gym.Env): +class BaseReacherDirectEnv(BaseReacherEnv): """ - Simple Reaching Task without any physics simulation. - Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions - towards the end of the trajectory. + Base class for directly controlled reaching environments """ - def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.01 - - self.random_start = random_start - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - # joint limits - self.j_min = -np.pi * np.ones(n_links) - self.j_max = np.pi * np.ones(n_links) - - self.max_vel = 1 - self.steps_before_reward = 199 + super().__init__(n_links, random_start, allow_self_collision) + self.max_vel = 10 * np.pi action_bound = np.ones((self.n_links,)) * self.max_vel - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - self.reward_function = None # Needs to be set in sub class - - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def reset(self): - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() def step(self, action: np.ndarray): """ A single step with action in angular velocity space """ - acc = (action - self._angle_velocity) / self.dt + self._acc = (action - self._angle_velocity) / self.dt self._angle_velocity = action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() self._is_collided = self._check_collisions() - # reward, info = self._get_reward(action) - reward, info = self.reward_function.get_reward(self, acc) + reward, info = self._get_reward(action) self._steps += 1 done = self._terminate(info) return self._get_obs().copy(), reward, done, info - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) - - def _check_self_collision(self): - """Checks whether line segments intersect""" - - if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): - return True - - link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) - for i, line1 in enumerate(link_lines): - for line2 in link_lines[i + 2:, :]: - if intersect(line1[0], line1[-1], line2[0], line2[-1]): - return True - return False - - @abstractmethod - def _get_reward(self, action: np.ndarray) -> (float, dict): - pass - - @abstractmethod - def _get_obs(self) -> np.ndarray: - pass - - @abstractmethod - def _check_collisions(self) -> bool: - pass - - @abstractmethod - def _terminate(self, info) -> bool: - return False - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def close(self): - del self.fig - - @property - def end_effector(self): - return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py index 12e5830..7789965 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py @@ -1,86 +1,19 @@ -from typing import Iterable, Union -from abc import ABCMeta, abstractmethod -import gym -import matplotlib.pyplot as plt -import numpy as np from gym import spaces -from gym.utils import seeding -from alr_envs.classic_control.utils import check_self_collision +import numpy as np +from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherEnv(gym.Env): +class BaseReacherTorqueEnv(BaseReacherEnv): """ - Simple Reaching Task without any physics simulation. - Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions - towards the end of the trajectory. + Base class for torque controlled reaching environments """ - def __init__(self, n_links: int, random_start: bool = True, - allow_self_collision: bool = False, collision_penalty: float = 1000): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.01 - - self.random_start = random_start - - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._is_collided = False - self.allow_self_collision = allow_self_collision - self.collision_penalty = collision_penalty - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self.max_torque = 1 - self.steps_before_reward = 199 + allow_self_collision: bool = False): + super().__init__(n_links, random_start, allow_self_collision) + self.max_torque = 1000 action_bound = np.ones((self.n_links,)) * self.max_torque - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def reset(self): - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() def step(self, action: np.ndarray): """ @@ -94,11 +27,6 @@ class BaseReacherEnv(gym.Env): self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - self._is_collided = self._check_collisions() reward, info = self._get_reward(action) @@ -107,36 +35,3 @@ class BaseReacherEnv(gym.Env): done = False return self._get_obs().copy(), reward, done, info - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) - - @abstractmethod - def _get_reward(self, action: np.ndarray) -> (float, dict): - pass - - @abstractmethod - def _get_obs(self) -> np.ndarray: - pass - - @abstractmethod - def _check_collisions(self) -> bool: - pass - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def close(self): - del self.fig - - @property - def end_effector(self): - return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 55aac9e..2cb1e08 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -3,14 +3,12 @@ from typing import Union import gym import matplotlib.pyplot as plt import numpy as np -from gym.utils import seeding from matplotlib import patches -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherEnv -from alr_envs.alr.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -class HoleReacherEnv(BaseReacherEnv): +class HoleReacherEnv(BaseReacherDirectEnv): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): @@ -28,7 +26,7 @@ class HoleReacherEnv(BaseReacherEnv): self._tmp_depth = None self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - action_bound = np.pi * np.ones((self.n_links,)) + # action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -38,19 +36,25 @@ class HoleReacherEnv(BaseReacherEnv): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + # self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) if rew_fct == "simple": - from alr_envs.alr.classic_control.hole_reacher.simple_reward import HolereacherSimpleReward - self.reward_function = HolereacherSimpleReward(allow_self_collision, allow_wall_collision, collision_penalty) + from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward + self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) + else: + raise ValueError("Unknown reward function {}".format(rew_fct)) def reset(self): self._generate_hole() self._set_patches() + self.reward_function.reset() return super().reset() + def _get_reward(self, action: np.ndarray) -> (float, dict): + return self.reward_function.get_reward(self) + def _terminate(self, info): return info["is_collided"] @@ -118,10 +122,6 @@ class HoleReacherEnv(BaseReacherEnv): end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - # xy = np.stack((x, y), axis=2) - # - # self._joints[0] + np.cumsum(xy, axis=0) - return np.squeeze(end_effector + self._joints[0, :]) def check_wall_collision(self): @@ -157,36 +157,6 @@ class HoleReacherEnv(BaseReacherEnv): return False - # def check_wall_collision(self, ): - # """find the intersection of line segments A=(x1,y1)/(x2,y2) and - # B=(x3,y3)/(x4,y4). """ - # - # link_lines = np.hstack((self._joints[:-1, :], self._joints[1:, :])) - # - # all_points_product = np.hstack( - # [np.repeat(link_lines, len(self.ground_lines), axis=0), - # np.tile(self.ground_lines, (len(link_lines), 1))]) - # - # x1, y1, x2, y2, x3, y3, x4, y4 = all_points_product.T - # - # denom = ((x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)) - # # if denom == 0: - # # return False - # px = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / denom - # py = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / denom - # # if (px - x1) * (px - x2) < 0 and (py - y1) * (py - y2) < 0 \ - # # and (px - x3) * (px - x4) < 0 and (py - y3) * (py - y4) < 0: - # # return True # [px, py] - # test = ((px - x1) * (px - x2) <= 0) & ((py - y1) * (py - y2) <= 0) & ((px - x3) * (px - x4) <= 0) & ( - # (py - y3) * (py - y4) <= 0) - # if np.any(test): - # possible_collisions = np.stack((px, py)).T[test] - # for row in possible_collisions: - # if not np.any([np.allclose(row, x) for x in self._joints]): - # return True, row - # - # return False, None - def render(self, mode='human'): if self.fig is None: # Create base figure once on the beginning. Afterwards only update @@ -242,322 +212,14 @@ class HoleReacherEnv(BaseReacherEnv): self.fig.gca().add_patch(hole_floor) -class HoleReacherEnvOld(gym.Env): - - def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, - hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, - allow_wall_collision: bool = False, collision_penalty: float = 1000): - - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start - - # provided initial parameters - self.initial_x = hole_x # x-position of center of hole - self.initial_width = hole_width # width of hole - self.initial_depth = hole_depth # depth of hole - - # temp container for current env state - self._tmp_x = None - self._tmp_width = None - self._tmp_depth = None - self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - - # collision - self.allow_self_collision = allow_self_collision - self.allow_wall_collision = allow_wall_collision - self.collision_penalty = collision_penalty - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf], # hole width - # [np.inf], # hole depth - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - # @property - # def start_pos(self): - # return self._start_pos - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - A single step with an action in joint velocity space - """ - - acc = (action - self._angle_velocity) / self.dt - self._angle_velocity = action - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) - self._update_joints() - - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - self.end_effector_traj.append(np.copy(self.end_effector)) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - - def reset(self): - if self.random_start: - # Maybe change more than first seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._generate_hole() - self._set_patches() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - self.end_effector_traj = [] - - return self._get_obs().copy() - - def _generate_hole(self): - if self.initial_width is None: - width = self.np_random.uniform(0.15, 0.5) - else: - width = np.copy(self.initial_width) - if self.initial_x is None: - # sample whole on left or right side - direction = self.np_random.choice([-1, 1]) - # Hole center needs to be half the width away from the arm to give a valid setting. - x = direction * self.np_random.uniform(width / 2, 3.5) - else: - x = np.copy(self.initial_x) - if self.initial_depth is None: - # TODO we do not want this right now. - depth = self.np_random.uniform(1, 1) - else: - depth = np.copy(self.initial_depth) - - self._tmp_width = width - self._tmp_x = x - self._tmp_depth = depth - self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) - - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - wall_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - - if not self.allow_wall_collision: - wall_collision = self._check_wall_collision(line_points_in_taskspace) - - self._is_collided = self_collision or wall_collision - - def _get_reward(self, acc: np.ndarray): - reward = 0 - # success = False - - if self._steps == 199 or self._is_collided: - # return reward only in last time step - # Episode also terminates when colliding, hence return reward - dist = np.linalg.norm(self.end_effector - self._goal) - # success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided - - reward -= 5e-8 * np.sum(acc ** 2) - # info = {"is_success": success} - - return reward, {} # info - - def _get_obs(self): - theta = self._joint_angles - return np.hstack([ - np.cos(theta), - np.sin(theta), - self._angle_velocity, - self._tmp_width, - # self._tmp_hole_depth, - self.end_effector - self._goal, - self._steps - ]) - - def _get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - accumulated_theta = np.cumsum(theta, axis=0) - end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - end_effector[0, :, 0] = x[0, :] - end_effector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] - end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - - return np.squeeze(end_effector + self._joints[0, :]) - - def _check_wall_collision(self, line_points): - # all points that are before the hole in x - r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_before_hole > 0: - return True - - # all points that are after the hole in x - r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_after_hole > 0: - return True - - # all points that are above the hole - r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) - - # check if any of those points are below surface - nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) - - if nr_line_points_below_surface_in_hole > 0: - return True - - return False - - def render(self, mode='human'): - if self.fig is None: - # Create base figure once on the beginning. Afterwards only update - plt.ion() - self.fig = plt.figure() - ax = self.fig.add_subplot(1, 1, 1) - - # limits - lim = np.sum(self.link_lengths) + 0.5 - ax.set_xlim([-lim, lim]) - ax.set_ylim([-1.1, lim]) - - self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') - self._set_patches() - self.fig.show() - - self.fig.gca().set_title( - f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - - if mode == "human": - - # arm - self.line.set_data(self._joints[:, 0], self._joints[:, 1]) - - self.fig.canvas.draw() - self.fig.canvas.flush_events() - - elif mode == "partial": - if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: - # Arm - plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', - alpha=self._steps / 200) - - def _set_patches(self): - if self.fig is not None: - self.fig.gca().patches = [] - left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), - self.n_links + self._tmp_x - self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), - self.n_links - self._tmp_x + self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), - self._tmp_width, - 1 - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - - # Add the patch to the Axes - self.fig.gca().add_patch(left_block) - self.fig.gca().add_patch(right_block) - self.fig.gca().add_patch(hole_floor) - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - super().close() - if self.fig is not None: - plt.close(self.fig) - - if __name__ == "__main__": import time env = HoleReacherEnv(5) env.reset() - start = time.time() for i in range(10000): - # env.check_wall_collision() ac = env.action_space.sample() obs, rew, done, info = env.step(ac) # env.render() if done: env.reset() - print(time.time() - start) diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py new file mode 100644 index 0000000..1b18bce --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py @@ -0,0 +1,315 @@ +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym.utils import seeding +from matplotlib import patches + +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.alr.classic_control.utils import check_self_collision + + +class HoleReacherEnvOld(gym.Env): + + def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, + hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, + allow_wall_collision: bool = False, collision_penalty: float = 1000): + + self.n_links = n_links + self.link_lengths = np.ones((n_links, 1)) + + self.random_start = random_start + + # provided initial parameters + self.initial_x = hole_x # x-position of center of hole + self.initial_width = hole_width # width of hole + self.initial_depth = hole_depth # depth of hole + + # temp container for current env state + self._tmp_x = None + self._tmp_width = None + self._tmp_depth = None + self._goal = None # x-y coordinates for reaching the center at the bottom of the hole + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + + # state + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self._dt = 0.01 + + action_bound = np.pi * np.ones((self.n_links,)) + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf], # hole width + # [np.inf], # hole depth + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human", "partial"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + # @property + # def start_pos(self): + # return self._start_pos + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def step(self, action: np.ndarray): + """ + A single step with an action in joint velocity space + """ + + acc = (action - self._angle_velocity) / self.dt + self._angle_velocity = action + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) + self._update_joints() + + reward, info = self._get_reward(acc) + + info.update({"is_collided": self._is_collided}) + self.end_effector_traj.append(np.copy(self.end_effector)) + + self._steps += 1 + done = self._is_collided + + return self._get_obs().copy(), reward, done, info + + def reset(self): + if self.random_start: + # Maybe change more than first seed + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._generate_hole() + self._set_patches() + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + self.end_effector_traj = [] + + return self._get_obs().copy() + + def _generate_hole(self): + if self.initial_width is None: + width = self.np_random.uniform(0.15, 0.5) + else: + width = np.copy(self.initial_width) + if self.initial_x is None: + # sample whole on left or right side + direction = self.np_random.choice([-1, 1]) + # Hole center needs to be half the width away from the arm to give a valid setting. + x = direction * self.np_random.uniform(width / 2, 3.5) + else: + x = np.copy(self.initial_x) + if self.initial_depth is None: + # TODO we do not want this right now. + depth = self.np_random.uniform(1, 1) + else: + depth = np.copy(self.initial_depth) + + self._tmp_width = width + self._tmp_x = x + self._tmp_depth = depth + self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) + + def _update_joints(self): + """ + update _joints to get new end effector position. The other links are only required for rendering. + Returns: + + """ + line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) + + self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] + self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] + + self_collision = False + wall_collision = False + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + if not self.allow_wall_collision: + wall_collision = self._check_wall_collision(line_points_in_taskspace) + + self._is_collided = self_collision or wall_collision + + def _get_reward(self, acc: np.ndarray): + reward = 0 + # success = False + + if self._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(self.end_effector - self._goal) + # success = dist < 0.005 and not self._is_collided + reward = - dist ** 2 - self.collision_penalty * self._is_collided + + reward -= 5e-8 * np.sum(acc ** 2) + # info = {"is_success": success} + + return reward, {} # info + + def _get_obs(self): + theta = self._joint_angles + return np.hstack([ + np.cos(theta), + np.sin(theta), + self._angle_velocity, + self._tmp_width, + # self._tmp_hole_depth, + self.end_effector - self._goal, + self._steps + ]) + + def _get_forward_kinematics(self, num_points_per_link=1): + theta = self._joint_angles[:, None] + + intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 + accumulated_theta = np.cumsum(theta, axis=0) + end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) + + x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points + y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points + + end_effector[0, :, 0] = x[0, :] + end_effector[0, :, 1] = y[0, :] + + for i in range(1, self.n_links): + end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] + end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] + + return np.squeeze(end_effector + self._joints[0, :]) + + def _check_wall_collision(self, line_points): + # all points that are before the hole in x + r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_before_hole > 0: + return True + + # all points that are after the hole in x + r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_after_hole > 0: + return True + + # all points that are above the hole + r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) + + # check if any of those points are below surface + nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) + + if nr_line_points_below_surface_in_hole > 0: + return True + + return False + + def render(self, mode='human'): + if self.fig is None: + # Create base figure once on the beginning. Afterwards only update + plt.ion() + self.fig = plt.figure() + ax = self.fig.add_subplot(1, 1, 1) + + # limits + lim = np.sum(self.link_lengths) + 0.5 + ax.set_xlim([-lim, lim]) + ax.set_ylim([-1.1, lim]) + + self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') + self._set_patches() + self.fig.show() + + self.fig.gca().set_title( + f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") + + if mode == "human": + + # arm + self.line.set_data(self._joints[:, 0], self._joints[:, 1]) + + self.fig.canvas.draw() + self.fig.canvas.flush_events() + + elif mode == "partial": + if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: + # Arm + plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', + alpha=self._steps / 200) + + def _set_patches(self): + if self.fig is not None: + self.fig.gca().patches = [] + left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), + self.n_links + self._tmp_x - self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), + self.n_links - self._tmp_x + self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), + self._tmp_width, + 1 - self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + + # Add the patch to the Axes + self.fig.gca().add_patch(left_block) + self.fig.gca().add_patch(right_block) + self.fig.gca().add_patch(hole_floor) + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + @property + def end_effector(self): + return self._joints[self.n_links].T + + def close(self): + super().close() + if self.fig is not None: + plt.close(self.fig) \ No newline at end of file diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py new file mode 100644 index 0000000..7fadad9 --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py @@ -0,0 +1,56 @@ +import numpy as np + + +class HolereacherReward: + def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): + self.collision_penalty = collision_penalty + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + self._is_collided = False + + self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, -1)) + + def reset(self): + self._is_collided = False + + def get_reward(self, env): + dist_cost = 0 + collision_cost = 0 + time_cost = 0 + success = False + + self_collision = False + wall_collision = False + + if not self.allow_self_collision: + self_collision = env._check_self_collision() + + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() + + self._is_collided = self_collision or wall_collision + + if env._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(env.end_effector - env._goal) + + success = dist < 0.005 and not self._is_collided + dist_cost = int(not self._is_collided) * dist ** 2 + collision_cost = self._is_collided * dist ** 2 + time_cost = 199 - env._steps + + info = {"is_success": success, + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} + + vel_cost = np.sum(env._angle_velocity ** 2) + acc_cost = np.sum(env._acc ** 2) + + reward_features = np.array((dist_cost, vel_cost, acc_cost, collision_cost, time_cost)) + reward = np.dot(reward_features, self.reward_factors) + + return reward, info diff --git a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py similarity index 64% rename from alr_envs/alr/classic_control/hole_reacher/simple_reward.py rename to alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py index 0609a62..5820ab1 100644 --- a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py +++ b/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py @@ -1,8 +1,7 @@ import numpy as np -from alr_envs.alr.classic_control.utils import check_self_collision -class HolereacherSimpleReward: +class HolereacherReward: def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): self.collision_penalty = collision_penalty @@ -11,16 +10,20 @@ class HolereacherSimpleReward: self.allow_wall_collision = allow_wall_collision self.collision_penalty = collision_penalty self._is_collided = False - pass - def get_reward(self, env, action): - reward = 0 + self.reward_factors = np.array((-1, -5e-8, -collision_penalty)) + + def reset(self): + self._is_collided = False + + def get_reward(self, env): + dist_cost = 0 + collision_cost = 0 success = False self_collision = False wall_collision = False - # joints = np.hstack((env._joints[:-1, :], env._joints[1:, :])) if not self.allow_self_collision: self_collision = env._check_self_collision() @@ -33,16 +36,18 @@ class HolereacherSimpleReward: # return reward only in last time step # Episode also terminates when colliding, hence return reward dist = np.linalg.norm(env.end_effector - env._goal) + dist_cost = dist ** 2 + collision_cost = int(self._is_collided) success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided info = {"is_success": success, - "is_collided": self._is_collided} + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} - acc = (action - env._angle_velocity) / env.dt - reward -= 5e-8 * np.sum(acc ** 2) + acc_cost = np.sum(env._acc ** 2) + + reward_features = np.array((dist_cost, acc_cost, collision_cost)) + reward = np.dot(reward_features, self.reward_factors) return reward, info - - diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 08c151f..40a4d95 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -1,26 +1,22 @@ from typing import Iterable, Union -import gym import matplotlib.pyplot as plt import numpy as np from gym import spaces -from gym.utils import seeding + +from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv -class SimpleReacherEnv(gym.Env): +class SimpleReacherEnv(BaseReacherTorqueEnv): """ Simple Reaching Task without any physics simulation. Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory. """ - def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.1 - - self.random_start = random_start + def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True, + allow_self_collision: bool = False,): + super().__init__(n_links, random_start, allow_self_collision) # provided initial parameters self.inital_target = target @@ -28,16 +24,10 @@ class SimpleReacherEnv(gym.Env): # temp container for current env state self._goal = None - self._joints = None - self._joint_angles = None - self._angle_velocity = None self._start_pos = np.zeros(self.n_links) - self._start_vel = np.zeros(self.n_links) - self.max_torque = 1 self.steps_before_reward = 199 - action_bound = np.ones((self.n_links,)) * self.max_torque state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -45,84 +35,24 @@ class SimpleReacherEnv(gym.Env): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) - self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - # @property # def start_pos(self): # return self._start_pos - @property - def current_pos(self): - return self._joint_angles - - @property - def current_vel(self): - return self._angle_velocity - - def step(self, action: np.ndarray): - """ - A single step with action in torque space - """ - - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity - self._update_joints() - - reward, info = self._get_reward(action) - - self._steps += 1 - done = False - - return self._get_obs().copy(), reward, done, info - def reset(self): - - # TODO: maybe do initialisation more random? - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - self._joint_angles = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - self._generate_goal() - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + return super().reset() def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal reward_dist = 0 + if not self.allow_self_collision: + self._is_collided = self._check_self_collision() + if self._steps >= self.steps_before_reward: reward_dist -= np.linalg.norm(diff) # reward_dist = np.exp(-0.1 * diff ** 2).mean() @@ -132,6 +62,9 @@ class SimpleReacherEnv(gym.Env): reward = reward_dist - reward_ctrl return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) + def _terminate(self, info): + return False + def _get_obs(self): theta = self._joint_angles return np.hstack([ @@ -190,13 +123,14 @@ class SimpleReacherEnv(gym.Env): self.fig.canvas.draw() self.fig.canvas.flush_events() - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - def close(self): - del self.fig +if __name__ == "__main__": + env = SimpleReacherEnv(5) + env.reset() + for i in range(200): + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) - @property - def end_effector(self): - return self._joints[self.n_links].T + env.render() + if done: + break diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 95d1e7f..3b47969 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -6,17 +6,15 @@ import numpy as np from gym.utils import seeding from alr_envs.alr.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -class ViaPointReacherEnv(gym.Env): +class ViaPointReacherEnv(BaseReacherDirectEnv): def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None, target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start + super().__init__(n_links, random_start, allow_self_collision) # provided initial parameters self.intitial_target = target # provided target value @@ -30,17 +28,6 @@ class ViaPointReacherEnv(gym.Env): self.allow_self_collision = allow_self_collision self.collision_penalty = collision_penalty - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - self.weight_matrix_scale = 1 - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -49,69 +36,15 @@ class ViaPointReacherEnv(gym.Env): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self): - return self._dt - # @property # def start_pos(self): # return self._start_pos - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - a single step with an action in joint velocity space - """ - vel = action - self._angle_velocity = vel - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity - self._update_joints() - - acc = (vel - self._angle_velocity) / self.dt - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - def reset(self): - - if self.random_start: - # Maybe change more than dirst seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - self._generate_goal() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() + return super().reset() def _generate_goal(self): # TODO: Maybe improve this later, this can yield quite a lot of invalid settings @@ -137,29 +70,13 @@ class ViaPointReacherEnv(gym.Env): self._via_point = via_target self._goal = goal - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self.get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi): - self_collision = True - - self._is_collided = self_collision - def _get_reward(self, acc): success = False reward = -np.inf + + if not self.allow_self_collision: + self._is_collided = self._check_self_collision() + if not self._is_collided: dist = np.inf # return intermediate reward for via point @@ -177,10 +94,15 @@ class ViaPointReacherEnv(gym.Env): reward -= dist ** 2 reward -= 5e-8 * np.sum(acc ** 2) - info = {"is_success": success} + info = {"is_success": success, + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} return reward, info + def _terminate(self, info): + return info["is_collided"] + def _get_obs(self): theta = self._joint_angles return np.hstack([ @@ -192,27 +114,6 @@ class ViaPointReacherEnv(gym.Env): self._steps ]) - def get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - - accumulated_theta = np.cumsum(theta, axis=0) - - endeffector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - endeffector[0, :, 0] = x[0, :] - endeffector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - endeffector[i, :, 0] = x[i, :] + endeffector[i - 1, -1, 0] - endeffector[i, :, 1] = y[i, :] + endeffector[i - 1, -1, 1] - - return np.squeeze(endeffector + self._joints[0, :]) - def render(self, mode='human'): goal_pos = self._goal.T via_pos = self._via_point.T @@ -281,14 +182,14 @@ class ViaPointReacherEnv(gym.Env): plt.pause(0.01) - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] +if __name__ == "__main__": + import time + env = ViaPointReacherEnv(5) + env.reset() - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - if self.fig is not None: - plt.close(self.fig) + for i in range(10000): + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) + env.render() + if done: + env.reset() diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index b307a52..e84da2f 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -149,16 +149,16 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = False # DMP - # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) # ProMP - # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) # DetProMP example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) # Altered basis functions - # example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) From df87885c8115bb9dc5c254c1b010a4706645c041 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 09:33:26 +0100 Subject: [PATCH 05/15] removed old holereacher env --- alr_envs/alr/__init__.py | 4 +- alr_envs/alr/classic_control/__init__.py | 2 +- .../hole_reacher/hole_reacher.py | 3 + .../hole_reacher/hole_reacher_old.py | 315 ------------------ .../hole_reacher/hr_dist_vel_acc_reward.py | 22 +- 5 files changed, 19 insertions(+), 327 deletions(-) delete mode 100644 alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index a364b96..2f7a81d 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -335,7 +335,7 @@ for _v in _versions: "num_basis": 5, "duration": 2, "learn_goal": True, - "alpha_phase": 2, + "alpha_phase": 2.5, "bandwidth_factor": 2, "policy_type": "velocity", "weights_scale": 50, @@ -357,7 +357,7 @@ for _v in _versions: "num_basis": 5, "duration": 2, "policy_type": "velocity", - "weights_scale": 0.2, + "weights_scale": 0.1, "zero_start": True } } diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py index 38f0faf..73575ab 100644 --- a/alr_envs/alr/classic_control/__init__.py +++ b/alr_envs/alr/classic_control/__init__.py @@ -1,3 +1,3 @@ -from .hole_reacher.hole_reacher import HoleReacherEnv, HoleReacherEnvOld +from .hole_reacher.hole_reacher import HoleReacherEnv from .simple_reacher.simple_reacher import SimpleReacherEnv from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 2cb1e08..03ceee2 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -42,6 +42,9 @@ class HoleReacherEnv(BaseReacherDirectEnv): if rew_fct == "simple": from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) + elif rew_fct == "vel_acc": + from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward + self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) else: raise ValueError("Unknown reward function {}".format(rew_fct)) diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py deleted file mode 100644 index 1b18bce..0000000 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py +++ /dev/null @@ -1,315 +0,0 @@ -from typing import Union - -import gym -import matplotlib.pyplot as plt -import numpy as np -from gym.utils import seeding -from matplotlib import patches - -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -from alr_envs.alr.classic_control.utils import check_self_collision - - -class HoleReacherEnvOld(gym.Env): - - def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, - hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, - allow_wall_collision: bool = False, collision_penalty: float = 1000): - - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start - - # provided initial parameters - self.initial_x = hole_x # x-position of center of hole - self.initial_width = hole_width # width of hole - self.initial_depth = hole_depth # depth of hole - - # temp container for current env state - self._tmp_x = None - self._tmp_width = None - self._tmp_depth = None - self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - - # collision - self.allow_self_collision = allow_self_collision - self.allow_wall_collision = allow_wall_collision - self.collision_penalty = collision_penalty - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf], # hole width - # [np.inf], # hole depth - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - # @property - # def start_pos(self): - # return self._start_pos - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - A single step with an action in joint velocity space - """ - - acc = (action - self._angle_velocity) / self.dt - self._angle_velocity = action - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) - self._update_joints() - - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - self.end_effector_traj.append(np.copy(self.end_effector)) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - - def reset(self): - if self.random_start: - # Maybe change more than first seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._generate_hole() - self._set_patches() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - self.end_effector_traj = [] - - return self._get_obs().copy() - - def _generate_hole(self): - if self.initial_width is None: - width = self.np_random.uniform(0.15, 0.5) - else: - width = np.copy(self.initial_width) - if self.initial_x is None: - # sample whole on left or right side - direction = self.np_random.choice([-1, 1]) - # Hole center needs to be half the width away from the arm to give a valid setting. - x = direction * self.np_random.uniform(width / 2, 3.5) - else: - x = np.copy(self.initial_x) - if self.initial_depth is None: - # TODO we do not want this right now. - depth = self.np_random.uniform(1, 1) - else: - depth = np.copy(self.initial_depth) - - self._tmp_width = width - self._tmp_x = x - self._tmp_depth = depth - self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) - - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - wall_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - - if not self.allow_wall_collision: - wall_collision = self._check_wall_collision(line_points_in_taskspace) - - self._is_collided = self_collision or wall_collision - - def _get_reward(self, acc: np.ndarray): - reward = 0 - # success = False - - if self._steps == 199 or self._is_collided: - # return reward only in last time step - # Episode also terminates when colliding, hence return reward - dist = np.linalg.norm(self.end_effector - self._goal) - # success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided - - reward -= 5e-8 * np.sum(acc ** 2) - # info = {"is_success": success} - - return reward, {} # info - - def _get_obs(self): - theta = self._joint_angles - return np.hstack([ - np.cos(theta), - np.sin(theta), - self._angle_velocity, - self._tmp_width, - # self._tmp_hole_depth, - self.end_effector - self._goal, - self._steps - ]) - - def _get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - accumulated_theta = np.cumsum(theta, axis=0) - end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - end_effector[0, :, 0] = x[0, :] - end_effector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] - end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - - return np.squeeze(end_effector + self._joints[0, :]) - - def _check_wall_collision(self, line_points): - # all points that are before the hole in x - r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_before_hole > 0: - return True - - # all points that are after the hole in x - r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_after_hole > 0: - return True - - # all points that are above the hole - r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) - - # check if any of those points are below surface - nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) - - if nr_line_points_below_surface_in_hole > 0: - return True - - return False - - def render(self, mode='human'): - if self.fig is None: - # Create base figure once on the beginning. Afterwards only update - plt.ion() - self.fig = plt.figure() - ax = self.fig.add_subplot(1, 1, 1) - - # limits - lim = np.sum(self.link_lengths) + 0.5 - ax.set_xlim([-lim, lim]) - ax.set_ylim([-1.1, lim]) - - self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') - self._set_patches() - self.fig.show() - - self.fig.gca().set_title( - f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - - if mode == "human": - - # arm - self.line.set_data(self._joints[:, 0], self._joints[:, 1]) - - self.fig.canvas.draw() - self.fig.canvas.flush_events() - - elif mode == "partial": - if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: - # Arm - plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', - alpha=self._steps / 200) - - def _set_patches(self): - if self.fig is not None: - self.fig.gca().patches = [] - left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), - self.n_links + self._tmp_x - self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), - self.n_links - self._tmp_x + self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), - self._tmp_width, - 1 - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - - # Add the patch to the Axes - self.fig.gca().add_patch(left_block) - self.fig.gca().add_patch(right_block) - self.fig.gca().add_patch(hole_floor) - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - super().close() - if self.fig is not None: - plt.close(self.fig) \ No newline at end of file diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py index 7fadad9..1d75b66 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py +++ b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py @@ -11,10 +11,11 @@ class HolereacherReward: self.collision_penalty = collision_penalty self._is_collided = False - self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, -1)) + self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, 0)) def reset(self): self._is_collided = False + self.collision_dist = 0 def get_reward(self, env): dist_cost = 0 @@ -25,22 +26,25 @@ class HolereacherReward: self_collision = False wall_collision = False - if not self.allow_self_collision: - self_collision = env._check_self_collision() + if not self._is_collided: + if not self.allow_self_collision: + self_collision = env._check_self_collision() - if not self.allow_wall_collision: - wall_collision = env.check_wall_collision() + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() - self._is_collided = self_collision or wall_collision + self._is_collided = self_collision or wall_collision - if env._steps == 199 or self._is_collided: + self.collision_dist = np.linalg.norm(env.end_effector - env._goal) + + if env._steps == 199: # or self._is_collided: # return reward only in last time step # Episode also terminates when colliding, hence return reward dist = np.linalg.norm(env.end_effector - env._goal) success = dist < 0.005 and not self._is_collided - dist_cost = int(not self._is_collided) * dist ** 2 - collision_cost = self._is_collided * dist ** 2 + dist_cost = dist ** 2 + collision_cost = self._is_collided * self.collision_dist ** 2 time_cost = 199 - env._steps info = {"is_success": success, From dc6df7a7c0931c91081d6958486305475ac45dc4 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 10:33:04 +0100 Subject: [PATCH 06/15] added ABC to reacher base envs --- .../alr/classic_control/base_reacher/base_reacher.py | 4 ++-- .../classic_control/base_reacher/base_reacher_direct.py | 6 ++++-- .../classic_control/base_reacher/base_reacher_torque.py | 9 ++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py index 66b234f..dd8c64b 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -1,5 +1,5 @@ from typing import Iterable, Union -from abc import ABCMeta, abstractmethod +from abc import ABC, abstractmethod import gym import matplotlib.pyplot as plt import numpy as np @@ -8,7 +8,7 @@ from gym.utils import seeding from alr_envs.alr.classic_control.utils import intersect -class BaseReacherEnv(gym.Env): +class BaseReacherEnv(gym.Env, ABC): """ Base class for all reaching environments. """ diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index 4f7b6f1..dc79827 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -1,9 +1,11 @@ +from abc import ABC + from gym import spaces import numpy as np from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherDirectEnv(BaseReacherEnv): +class BaseReacherDirectEnv(BaseReacherEnv, ABC): """ Base class for directly controlled reaching environments """ @@ -11,7 +13,7 @@ class BaseReacherDirectEnv(BaseReacherEnv): allow_self_collision: bool = False): super().__init__(n_links, random_start, allow_self_collision) - self.max_vel = 10 * np.pi + self.max_vel = 2 * np.pi action_bound = np.ones((self.n_links,)) * self.max_vel self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py index 7789965..094f632 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py @@ -1,9 +1,11 @@ +from abc import ABC + from gym import spaces import numpy as np from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherTorqueEnv(BaseReacherEnv): +class BaseReacherTorqueEnv(BaseReacherEnv, ABC): """ Base class for torque controlled reaching environments """ @@ -20,10 +22,7 @@ class BaseReacherTorqueEnv(BaseReacherEnv): A single step with action in torque space """ - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac + self._angle_velocity = self._angle_velocity + self.dt * action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() From 655d52aa35d8fe1e5b148f68555eb256fd8be580 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 12:05:19 +0100 Subject: [PATCH 07/15] reacher envs pass unittests --- alr_envs/alr/__init__.py | 4 ++-- .../alr/classic_control/base_reacher/base_reacher.py | 5 +++++ .../alr/classic_control/hole_reacher/hole_reacher.py | 5 ++++- .../classic_control/simple_reacher/simple_reacher.py | 3 +++ .../viapoint_reacher/viapoint_reacher.py | 6 ++++-- alr_envs/alr/mujoco/reacher/alr_reacher.py | 1 + alr_envs/examples/examples_motion_primitives.py | 10 +++++----- 7 files changed, 24 insertions(+), 10 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 2f7a81d..03a986f 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -210,7 +210,7 @@ for _v in _versions: "mp_kwargs": { "num_dof": 2 if "long" not in _v.lower() else 5, "num_basis": 5, - "duration": 20, + "duration": 2, "alpha_phase": 2, "learn_goal": True, "policy_type": "velocity", @@ -250,7 +250,7 @@ for _v in _versions: "mp_kwargs": { "num_dof": 2 if "long" not in _v.lower() else 5, "num_basis": 5, - "duration": 20, + "duration": 2, "width": 0.025, "policy_type": "velocity", "weights_scale": 0.2, diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py index dd8c64b..1b1ad19 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -22,6 +22,8 @@ class BaseReacherEnv(gym.Env, ABC): self.random_start = random_start + self.allow_self_collision = allow_self_collision + # state self._joints = None self._joint_angles = None @@ -103,6 +105,9 @@ class BaseReacherEnv(gym.Env, ABC): def _check_self_collision(self): """Checks whether line segments intersect""" + if self.allow_self_collision: + return False + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): return True diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 03ceee2..dd7321a 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -127,6 +127,9 @@ class HoleReacherEnv(BaseReacherDirectEnv): return np.squeeze(end_effector + self._joints[0, :]) + def _check_collisions(self) -> bool: + return self._check_self_collision() or self.check_wall_collision() + def check_wall_collision(self): line_points = self._get_line_points(num_points_per_link=100) @@ -223,6 +226,6 @@ if __name__ == "__main__": for i in range(10000): ac = env.action_space.sample() obs, rew, done, info = env.step(ac) - # env.render() + env.render() if done: env.reset() diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 40a4d95..758f824 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -88,6 +88,9 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self._goal = goal + def _check_collisions(self) -> bool: + return self._check_self_collision() + def render(self, mode='human'): # pragma: no cover if self.fig is None: # Create base figure once on the beginning. Afterwards only update diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 3b47969..748eb99 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -25,7 +25,6 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self._goal = np.array((n_links, 0)) # collision - self.allow_self_collision = allow_self_collision self.collision_penalty = collision_penalty state_bound = np.hstack([ @@ -96,7 +95,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): reward -= 5e-8 * np.sum(acc ** 2) info = {"is_success": success, "is_collided": self._is_collided, - "end_effector": np.copy(env.end_effector)} + "end_effector": np.copy(self.end_effector)} return reward, info @@ -114,6 +113,9 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self._steps ]) + def _check_collisions(self) -> bool: + return self._check_self_collision() + def render(self, mode='human'): goal_pos = self._goal.T via_pos = self._via_point.T diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py index 6ca66d1..2d122d2 100644 --- a/alr_envs/alr/mujoco/reacher/alr_reacher.py +++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py @@ -87,6 +87,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle): [self._steps], ]) + if __name__ == '__main__': nl = 5 render_mode = "human" # "human" or "partial" or "final" diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index e84da2f..0df05c1 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -112,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. - wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper] + wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper] mp_kwargs = { "num_dof": 5, "num_basis": 5, @@ -147,15 +147,15 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP - example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # ProMP - example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) # DetProMP - example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) # Altered basis functions example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) From ebca59b4bda683276450b5e19950f850edd1cb8b Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Thu, 8 Jul 2021 13:48:08 +0200 Subject: [PATCH 08/15] rebase onto master --- alr_envs/alr/classic_control/utils.py | 2 +- .../classic_control/base_reacher/__init__.py | 0 .../base_reacher/base_reacher_direct.py | 142 ++++++++++++++++++ .../base_reacher/base_reacher_torque.py | 142 ++++++++++++++++++ 4 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 alr_envs/classic_control/base_reacher/__init__.py create mode 100644 alr_envs/classic_control/base_reacher/base_reacher_direct.py create mode 100644 alr_envs/classic_control/base_reacher/base_reacher_torque.py diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/alr/classic_control/utils.py index fa8176a..ea378d1 100644 --- a/alr_envs/alr/classic_control/utils.py +++ b/alr_envs/alr/classic_control/utils.py @@ -10,7 +10,7 @@ def intersect(A, B, C, D): def check_self_collision(line_points): - """Checks whether line segments and intersect""" + """Checks whether line segments intersect""" for i, line1 in enumerate(line_points): for line2 in line_points[i + 2:, :, :]: if intersect(line1[0], line1[-1], line2[0], line2[-1]): diff --git a/alr_envs/classic_control/base_reacher/__init__.py b/alr_envs/classic_control/base_reacher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/classic_control/base_reacher/base_reacher_direct.py new file mode 100644 index 0000000..12e5830 --- /dev/null +++ b/alr_envs/classic_control/base_reacher/base_reacher_direct.py @@ -0,0 +1,142 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.classic_control.utils import check_self_collision + + +class BaseReacherEnv(gym.Env): + """ + Simple Reaching Task without any physics simulation. + Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions + towards the end of the trajectory. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False, collision_penalty: float = 1000): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._is_collided = False + self.allow_self_collision = allow_self_collision + self.collision_penalty = collision_penalty + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self.max_torque = 1 + self.steps_before_reward = 199 + + action_bound = np.ones((self.n_links,)) * self.max_torque + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + def step(self, action: np.ndarray): + """ + A single step with action in torque space + """ + + # action = self._add_action_noise(action) + ac = np.clip(action, -self.max_torque, self.max_torque) + + self._angle_velocity = self._angle_velocity + self.dt * ac + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity + self._update_joints() + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + self._is_collided = self._check_collisions() + + reward, info = self._get_reward(action) + + self._steps += 1 + done = False + + return self._get_obs().copy(), reward, done, info + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T diff --git a/alr_envs/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/classic_control/base_reacher/base_reacher_torque.py new file mode 100644 index 0000000..12e5830 --- /dev/null +++ b/alr_envs/classic_control/base_reacher/base_reacher_torque.py @@ -0,0 +1,142 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.classic_control.utils import check_self_collision + + +class BaseReacherEnv(gym.Env): + """ + Simple Reaching Task without any physics simulation. + Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions + towards the end of the trajectory. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False, collision_penalty: float = 1000): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._is_collided = False + self.allow_self_collision = allow_self_collision + self.collision_penalty = collision_penalty + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self.max_torque = 1 + self.steps_before_reward = 199 + + action_bound = np.ones((self.n_links,)) * self.max_torque + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + def step(self, action: np.ndarray): + """ + A single step with action in torque space + """ + + # action = self._add_action_noise(action) + ac = np.clip(action, -self.max_torque, self.max_torque) + + self._angle_velocity = self._angle_velocity + self.dt * ac + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity + self._update_joints() + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + self._is_collided = self._check_collisions() + + reward, info = self._get_reward(action) + + self._steps += 1 + done = False + + return self._get_obs().copy(), reward, done, info + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T From a1a5da3f1e73646ba0857c44e5e635c0e67bbd20 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Mon, 15 Nov 2021 09:10:03 +0100 Subject: [PATCH 09/15] added promp wrapped environments --- alr_envs/__init__.py | 2 +- alr_envs/alr/__init__.py | 21 ++++++++- alr_envs/dmc/__init__.py | 2 +- .../examples/examples_motion_primitives.py | 5 ++- alr_envs/meta/__init__.py | 2 +- alr_envs/open_ai/__init__.py | 2 +- alr_envs/utils/make_env_helpers.py | 43 ++++++++++++++++++- 7 files changed, 70 insertions(+), 7 deletions(-) diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index e43e3b1..30fa7b8 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,5 +1,5 @@ from alr_envs import dmc, meta, open_ai -from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_rank +from alr_envs.utils.make_env_helpers import make, make_detpmp_env, make_dmp_env, make_promp_env, make_rank from alr_envs.utils import make_dmc # Convenience function for all MP environments diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index d9843c0..627625d 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -10,7 +10,7 @@ from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.reacher.alr_reacher import ALRReacherEnv from .mujoco.reacher.balancing import BalancingEnv -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # Classic Control ## Simple Reacher @@ -335,6 +335,25 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + _env_id = f'HoleReacherProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + _env_id = f'HoleReacherDetPMP-{_v}' register( id=_env_id, diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py index 17d1f7f..ca6469e 100644 --- a/alr_envs/dmc/__init__.py +++ b/alr_envs/dmc/__init__.py @@ -1,6 +1,6 @@ from . import manipulation, suite -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} from gym.envs.registration import register diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index 6decdb1..de365b7 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -147,10 +147,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # ProMP + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) + # DetProMP example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index fa63c94..9db0689 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -3,7 +3,7 @@ from gym import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper -ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # MetaWorld diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py index 51dd712..63083ca 100644 --- a/alr_envs/open_ai/__init__.py +++ b/alr_envs/open_ai/__init__.py @@ -3,7 +3,7 @@ from gym.wrappers import FlattenObservation from . import classic_control, mujoco, robotics -ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": [], "DetPMP": []} # Short Continuous Mountain Car register( diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index fc73b05..19f54d6 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -7,6 +7,7 @@ from gym.envs.registration import EnvSpec from mp_env_api import MPEnvWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper +from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): @@ -132,6 +133,26 @@ def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs return DmpWrapper(_env, **mp_kwargs) +def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): + """ + This can also be used standalone for manually building a custom ProMP environment. + Args: + env_id: base_env_name, + wrappers: list of wrappers (at least an MPEnvWrapper), + mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} + + Returns: ProMP wrapped gym env + + """ + _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) + + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + + _verify_dof(_env, mp_kwargs.get("num_dof")) + + return ProMPWrapper(_env, **mp_kwargs) + + def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): """ This can also be used standalone for manually building a custom Det ProMP environment. @@ -140,7 +161,7 @@ def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwa wrappers: list of wrappers (at least an MPEnvWrapper), mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} - Returns: DMP wrapped gym env + Returns: Det ProMP wrapped gym env """ _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) @@ -171,6 +192,26 @@ def make_dmp_env_helper(**kwargs): mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) +def make_promp_env_helper(**kwargs): + """ + Helper function for registering ProMP gym environments. + This can also be used standalone for manually building a custom ProMP environment. + Args: + **kwargs: expects at least the following: + { + "name": base_env_name, + "wrappers": list of wrappers (at least an MPEnvWrapper), + "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int} + } + + Returns: ProMP wrapped gym env + + """ + seed = kwargs.pop("seed", None) + return make_promp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) + + def make_detpmp_env_helper(**kwargs): """ Helper function for registering ProMP gym environments. From 95560c2bab13127842b1e5999a27972b012f19d3 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Wed, 17 Nov 2021 17:14:13 +0100 Subject: [PATCH 10/15] wip --- alr_envs/alr/__init__.py | 2 +- alr_envs/alr/classic_control/__init__.py | 2 +- .../classic_control/base_reacher/__init__.py | 0 .../base_reacher/base_reacher_direct.py | 53 ++-- .../base_reacher/base_reacher_torque.py | 0 .../hole_reacher/hole_reacher.py | 251 +++++++++++++++++- .../hole_reacher/mp_wrapper.py | 8 + .../hole_reacher/simple_reward.py | 48 ++++ alr_envs/alr/classic_control/utils.py | 3 + .../examples/examples_motion_primitives.py | 12 +- 10 files changed, 351 insertions(+), 28 deletions(-) rename alr_envs/{ => alr}/classic_control/base_reacher/__init__.py (100%) rename alr_envs/{ => alr}/classic_control/base_reacher/base_reacher_direct.py (73%) rename alr_envs/{ => alr}/classic_control/base_reacher/base_reacher_torque.py (100%) create mode 100644 alr_envs/alr/classic_control/hole_reacher/simple_reward.py diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 627625d..565cbca 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -84,7 +84,7 @@ register( register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.alr.classic_control:HoleReacherEnvOld', max_episode_steps=200, kwargs={ "n_links": 5, diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py index 73575ab..38f0faf 100644 --- a/alr_envs/alr/classic_control/__init__.py +++ b/alr_envs/alr/classic_control/__init__.py @@ -1,3 +1,3 @@ -from .hole_reacher.hole_reacher import HoleReacherEnv +from .hole_reacher.hole_reacher import HoleReacherEnv, HoleReacherEnvOld from .simple_reacher.simple_reacher import SimpleReacherEnv from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv diff --git a/alr_envs/classic_control/base_reacher/__init__.py b/alr_envs/alr/classic_control/base_reacher/__init__.py similarity index 100% rename from alr_envs/classic_control/base_reacher/__init__.py rename to alr_envs/alr/classic_control/base_reacher/__init__.py diff --git a/alr_envs/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py similarity index 73% rename from alr_envs/classic_control/base_reacher/base_reacher_direct.py rename to alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index 12e5830..c4afaed 100644 --- a/alr_envs/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -5,7 +5,7 @@ import matplotlib.pyplot as plt import numpy as np from gym import spaces from gym.utils import seeding -from alr_envs.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.utils import intersect class BaseReacherEnv(gym.Env): @@ -16,7 +16,7 @@ class BaseReacherEnv(gym.Env): """ def __init__(self, n_links: int, random_start: bool = True, - allow_self_collision: bool = False, collision_penalty: float = 1000): + allow_self_collision: bool = False): super().__init__() self.link_lengths = np.ones(n_links) self.n_links = n_links @@ -24,19 +24,21 @@ class BaseReacherEnv(gym.Env): self.random_start = random_start + # state self._joints = None self._joint_angles = None self._angle_velocity = None - self._is_collided = False - self.allow_self_collision = allow_self_collision - self.collision_penalty = collision_penalty self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) self._start_vel = np.zeros(self.n_links) - self.max_torque = 1 + # joint limits + self.j_min = -np.pi * np.ones(n_links) + self.j_max = np.pi * np.ones(n_links) + + self.max_vel = 1 self.steps_before_reward = 199 - action_bound = np.ones((self.n_links,)) * self.max_torque + action_bound = np.ones((self.n_links,)) * self.max_vel state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -47,6 +49,8 @@ class BaseReacherEnv(gym.Env): self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + self.reward_function = None # Needs to be set in sub class + # containers for plotting self.metadata = {'render.modes': ["human"]} self.fig = None @@ -84,27 +88,21 @@ class BaseReacherEnv(gym.Env): def step(self, action: np.ndarray): """ - A single step with action in torque space + A single step with action in angular velocity space """ - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac + acc = (action - self._angle_velocity) / self.dt + self._angle_velocity = action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - self._is_collided = self._check_collisions() - reward, info = self._get_reward(action) + # reward, info = self._get_reward(action) + reward, info = self.reward_function.get_reward(self, acc) self._steps += 1 - done = False + done = self._terminate(info) return self._get_obs().copy(), reward, done, info @@ -118,6 +116,19 @@ class BaseReacherEnv(gym.Env): x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + def _check_self_collision(self): + """Checks whether line segments intersect""" + + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): + return True + + link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) + for i, line1 in enumerate(link_lines): + for line2 in link_lines[i + 2:, :]: + if intersect(line1[0], line1[-1], line2[0], line2[-1]): + return True + return False + @abstractmethod def _get_reward(self, action: np.ndarray) -> (float, dict): pass @@ -130,6 +141,10 @@ class BaseReacherEnv(gym.Env): def _check_collisions(self) -> bool: pass + @abstractmethod + def _terminate(self, info) -> bool: + return False + def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] diff --git a/alr_envs/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py similarity index 100% rename from alr_envs/classic_control/base_reacher/base_reacher_torque.py rename to alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index ee1a997..55aac9e 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -6,10 +6,243 @@ import numpy as np from gym.utils import seeding from matplotlib import patches +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherEnv from alr_envs.alr.classic_control.utils import check_self_collision -class HoleReacherEnv(gym.Env): +class HoleReacherEnv(BaseReacherEnv): + def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, + hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, + allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): + + super().__init__(n_links, random_start, allow_self_collision) + + # provided initial parameters + self.initial_x = hole_x # x-position of center of hole + self.initial_width = hole_width # width of hole + self.initial_depth = hole_depth # depth of hole + + # temp container for current env state + self._tmp_x = None + self._tmp_width = None + self._tmp_depth = None + self._goal = None # x-y coordinates for reaching the center at the bottom of the hole + + action_bound = np.pi * np.ones((self.n_links,)) + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf], # hole width + # [np.inf], # hole depth + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + if rew_fct == "simple": + from alr_envs.alr.classic_control.hole_reacher.simple_reward import HolereacherSimpleReward + self.reward_function = HolereacherSimpleReward(allow_self_collision, allow_wall_collision, collision_penalty) + + def reset(self): + self._generate_hole() + self._set_patches() + + return super().reset() + + def _terminate(self, info): + return info["is_collided"] + + def _generate_hole(self): + if self.initial_width is None: + width = self.np_random.uniform(0.15, 0.5) + else: + width = np.copy(self.initial_width) + if self.initial_x is None: + # sample whole on left or right side + direction = self.np_random.choice([-1, 1]) + # Hole center needs to be half the width away from the arm to give a valid setting. + x = direction * self.np_random.uniform(width / 2, 3.5) + else: + x = np.copy(self.initial_x) + if self.initial_depth is None: + # TODO we do not want this right now. + depth = self.np_random.uniform(1, 1) + else: + depth = np.copy(self.initial_depth) + + self._tmp_width = width + self._tmp_x = x + self._tmp_depth = depth + self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) + + self._line_ground_left = np.array([-self.n_links, 0, x - width / 2, 0]) + self._line_ground_right = np.array([x + width / 2, 0, self.n_links, 0]) + self._line_ground_hole = np.array([x - width / 2, -depth, x + width / 2, -depth]) + self._line_hole_left = np.array([x - width / 2, -depth, x - width / 2, 0]) + self._line_hole_right = np.array([x + width / 2, -depth, x + width / 2, 0]) + + self.ground_lines = np.stack((self._line_ground_left, + self._line_ground_right, + self._line_ground_hole, + self._line_hole_left, + self._line_hole_right)) + + def _get_obs(self): + theta = self._joint_angles + return np.hstack([ + np.cos(theta), + np.sin(theta), + self._angle_velocity, + self._tmp_width, + # self._tmp_hole_depth, + self.end_effector - self._goal, + self._steps + ]) + + def _get_line_points(self, num_points_per_link=1): + theta = self._joint_angles[:, None] + + intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 + accumulated_theta = np.cumsum(theta, axis=0) + end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) + + x = np.cos(accumulated_theta) * self.link_lengths[:, None] * intermediate_points + y = np.sin(accumulated_theta) * self.link_lengths[:, None] * intermediate_points + + end_effector[0, :, 0] = x[0, :] + end_effector[0, :, 1] = y[0, :] + + for i in range(1, self.n_links): + end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] + end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] + + # xy = np.stack((x, y), axis=2) + # + # self._joints[0] + np.cumsum(xy, axis=0) + + return np.squeeze(end_effector + self._joints[0, :]) + + def check_wall_collision(self): + line_points = self._get_line_points(num_points_per_link=100) + + # all points that are before the hole in x + r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_before_hole > 0: + return True + + # all points that are after the hole in x + r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_after_hole > 0: + return True + + # all points that are above the hole + r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) + + # check if any of those points are below surface + nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) + + if nr_line_points_below_surface_in_hole > 0: + return True + + return False + + # def check_wall_collision(self, ): + # """find the intersection of line segments A=(x1,y1)/(x2,y2) and + # B=(x3,y3)/(x4,y4). """ + # + # link_lines = np.hstack((self._joints[:-1, :], self._joints[1:, :])) + # + # all_points_product = np.hstack( + # [np.repeat(link_lines, len(self.ground_lines), axis=0), + # np.tile(self.ground_lines, (len(link_lines), 1))]) + # + # x1, y1, x2, y2, x3, y3, x4, y4 = all_points_product.T + # + # denom = ((x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)) + # # if denom == 0: + # # return False + # px = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / denom + # py = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / denom + # # if (px - x1) * (px - x2) < 0 and (py - y1) * (py - y2) < 0 \ + # # and (px - x3) * (px - x4) < 0 and (py - y3) * (py - y4) < 0: + # # return True # [px, py] + # test = ((px - x1) * (px - x2) <= 0) & ((py - y1) * (py - y2) <= 0) & ((px - x3) * (px - x4) <= 0) & ( + # (py - y3) * (py - y4) <= 0) + # if np.any(test): + # possible_collisions = np.stack((px, py)).T[test] + # for row in possible_collisions: + # if not np.any([np.allclose(row, x) for x in self._joints]): + # return True, row + # + # return False, None + + def render(self, mode='human'): + if self.fig is None: + # Create base figure once on the beginning. Afterwards only update + plt.ion() + self.fig = plt.figure() + ax = self.fig.add_subplot(1, 1, 1) + + # limits + lim = np.sum(self.link_lengths) + 0.5 + ax.set_xlim([-lim, lim]) + ax.set_ylim([-1.1, lim]) + + self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') + self._set_patches() + self.fig.show() + + self.fig.gca().set_title( + f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") + + if mode == "human": + + # arm + self.line.set_data(self._joints[:, 0], self._joints[:, 1]) + + self.fig.canvas.draw() + self.fig.canvas.flush_events() + + elif mode == "partial": + if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: + # Arm + plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', + alpha=self._steps / 200) + + def _set_patches(self): + if self.fig is not None: + # self.fig.gca().patches = [] + left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), + self.n_links + self._tmp_x - self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), + self.n_links - self._tmp_x + self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), + self._tmp_width, + 1 - self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + + # Add the patch to the Axes + self.fig.gca().add_patch(left_block) + self.fig.gca().add_patch(right_block) + self.fig.gca().add_patch(hole_floor) + + +class HoleReacherEnvOld(gym.Env): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, @@ -312,3 +545,19 @@ class HoleReacherEnv(gym.Env): super().close() if self.fig is not None: plt.close(self.fig) + + +if __name__ == "__main__": + import time + env = HoleReacherEnv(5) + env.reset() + + start = time.time() + for i in range(10000): + # env.check_wall_collision() + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) + # env.render() + if done: + env.reset() + print(time.time() - start) diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py index d951161..feb545f 100644 --- a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py +++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py @@ -18,6 +18,14 @@ class MPWrapper(MPEnvWrapper): [False] # env steps ]) + # @property + # def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + # return self._joint_angles.copy() + # + # @property + # def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + # return self._angle_velocity.copy() + @property def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: return self.env.current_pos diff --git a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py b/alr_envs/alr/classic_control/hole_reacher/simple_reward.py new file mode 100644 index 0000000..0609a62 --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/simple_reward.py @@ -0,0 +1,48 @@ +import numpy as np +from alr_envs.alr.classic_control.utils import check_self_collision + + +class HolereacherSimpleReward: + def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): + self.collision_penalty = collision_penalty + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + self._is_collided = False + pass + + def get_reward(self, env, action): + reward = 0 + success = False + + self_collision = False + wall_collision = False + + # joints = np.hstack((env._joints[:-1, :], env._joints[1:, :])) + if not self.allow_self_collision: + self_collision = env._check_self_collision() + + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() + + self._is_collided = self_collision or wall_collision + + if env._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(env.end_effector - env._goal) + + success = dist < 0.005 and not self._is_collided + reward = - dist ** 2 - self.collision_penalty * self._is_collided + + info = {"is_success": success, + "is_collided": self._is_collided} + + acc = (action - env._angle_velocity) / env.dt + reward -= 5e-8 * np.sum(acc ** 2) + + return reward, info + + diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/alr/classic_control/utils.py index ea378d1..b557a0a 100644 --- a/alr_envs/alr/classic_control/utils.py +++ b/alr_envs/alr/classic_control/utils.py @@ -1,3 +1,6 @@ +import numpy as np + + def ccw(A, B, C): return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12 diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index de365b7..b307a52 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -147,18 +147,18 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # DMP - example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) # ProMP - example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) + # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) # DetProMP - example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) # Altered basis functions - example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + # example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # Custom MP - example_fully_custom_mp(seed=10, iterations=1, render=render) + # example_fully_custom_mp(seed=10, iterations=1, render=render) From 386c41877846513fd53fee934df8eca8d33b4412 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Fri, 26 Nov 2021 16:31:46 +0100 Subject: [PATCH 11/15] reacher envs cleanup - base classes for direct and torque control - added promp wrapper support --- alr_envs/alr/__init__.py | 41 +- .../base_reacher/base_reacher.py | 141 +++++++ .../base_reacher/base_reacher_direct.py | 138 +------ .../base_reacher/base_reacher_torque.py | 119 +----- .../hole_reacher/hole_reacher.py | 362 +----------------- .../hole_reacher/hole_reacher_old.py | 315 +++++++++++++++ .../hole_reacher/hr_dist_vel_acc_reward.py | 56 +++ .../{simple_reward.py => hr_simple_reward.py} | 29 +- .../simple_reacher/simple_reacher.py | 110 ++---- .../viapoint_reacher/viapoint_reacher.py | 147 ++----- .../examples/examples_motion_primitives.py | 8 +- 11 files changed, 645 insertions(+), 821 deletions(-) create mode 100644 alr_envs/alr/classic_control/base_reacher/base_reacher.py create mode 100644 alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py create mode 100644 alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py rename alr_envs/alr/classic_control/hole_reacher/{simple_reward.py => hr_simple_reward.py} (64%) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 565cbca..f55c1df 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -84,7 +84,7 @@ register( register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnvOld', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -110,7 +110,7 @@ register( "hole_width": 0.25, "hole_depth": 1, "hole_x": 2, - "collision_penalty": 100, + "collision_penalty": 1, } ) @@ -247,6 +247,25 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + _env_id = f'{_name[0]}ProMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + _env_id = f'{_name[0]}DetPMP-{_name[1]}' register( id=_env_id, @@ -289,6 +308,24 @@ register( ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +register( + id="ViaPointReacherProMP-v0", + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") + register( id='ViaPointReacherDetPMP-v0', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py new file mode 100644 index 0000000..66b234f --- /dev/null +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -0,0 +1,141 @@ +from typing import Iterable, Union +from abc import ABCMeta, abstractmethod +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym import spaces +from gym.utils import seeding +from alr_envs.alr.classic_control.utils import intersect + + +class BaseReacherEnv(gym.Env): + """ + Base class for all reaching environments. + """ + + def __init__(self, n_links: int, random_start: bool = True, + allow_self_collision: bool = False): + super().__init__() + self.link_lengths = np.ones(n_links) + self.n_links = n_links + self._dt = 0.01 + + self.random_start = random_start + + # state + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._acc = None + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + # joint limits + self.j_min = -np.pi * np.ones(n_links) + self.j_max = np.pi * np.ones(n_links) + + self.steps_before_reward = 199 + + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + + self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + self.reward_function = None # Needs to be set in sub class + + # containers for plotting + self.metadata = {'render.modes': ["human"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def reset(self): + # Sample only orientation of first link, i.e. the arm is always straight. + if self.random_start: + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + + return self._get_obs().copy() + + @abstractmethod + def step(self, action: np.ndarray): + """ + A single step with action in angular velocity space + """ + raise NotImplementedError + + def _update_joints(self): + """ + update joints to get new end-effector position. The other links are only required for rendering. + Returns: + + """ + angles = np.cumsum(self._joint_angles) + x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) + self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + + def _check_self_collision(self): + """Checks whether line segments intersect""" + + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): + return True + + link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) + for i, line1 in enumerate(link_lines): + for line2 in link_lines[i + 2:, :]: + if intersect(line1[0], line1[-1], line2[0], line2[-1]): + return True + return False + + @abstractmethod + def _get_reward(self, action: np.ndarray) -> (float, dict): + pass + + @abstractmethod + def _get_obs(self) -> np.ndarray: + pass + + @abstractmethod + def _check_collisions(self) -> bool: + pass + + @abstractmethod + def _terminate(self, info) -> bool: + return False + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def close(self): + del self.fig + + @property + def end_effector(self): + return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index c4afaed..4f7b6f1 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -1,157 +1,35 @@ -from typing import Iterable, Union -from abc import ABCMeta, abstractmethod -import gym -import matplotlib.pyplot as plt -import numpy as np from gym import spaces -from gym.utils import seeding -from alr_envs.alr.classic_control.utils import intersect +import numpy as np +from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherEnv(gym.Env): +class BaseReacherDirectEnv(BaseReacherEnv): """ - Simple Reaching Task without any physics simulation. - Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions - towards the end of the trajectory. + Base class for directly controlled reaching environments """ - def __init__(self, n_links: int, random_start: bool = True, allow_self_collision: bool = False): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.01 - - self.random_start = random_start - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - # joint limits - self.j_min = -np.pi * np.ones(n_links) - self.j_max = np.pi * np.ones(n_links) - - self.max_vel = 1 - self.steps_before_reward = 199 + super().__init__(n_links, random_start, allow_self_collision) + self.max_vel = 10 * np.pi action_bound = np.ones((self.n_links,)) * self.max_vel - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - self.reward_function = None # Needs to be set in sub class - - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def reset(self): - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() def step(self, action: np.ndarray): """ A single step with action in angular velocity space """ - acc = (action - self._angle_velocity) / self.dt + self._acc = (action - self._angle_velocity) / self.dt self._angle_velocity = action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() self._is_collided = self._check_collisions() - # reward, info = self._get_reward(action) - reward, info = self.reward_function.get_reward(self, acc) + reward, info = self._get_reward(action) self._steps += 1 done = self._terminate(info) return self._get_obs().copy(), reward, done, info - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) - - def _check_self_collision(self): - """Checks whether line segments intersect""" - - if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): - return True - - link_lines = np.stack((self._joints[:-1, :], self._joints[1:, :]), axis=1) - for i, line1 in enumerate(link_lines): - for line2 in link_lines[i + 2:, :]: - if intersect(line1[0], line1[-1], line2[0], line2[-1]): - return True - return False - - @abstractmethod - def _get_reward(self, action: np.ndarray) -> (float, dict): - pass - - @abstractmethod - def _get_obs(self) -> np.ndarray: - pass - - @abstractmethod - def _check_collisions(self) -> bool: - pass - - @abstractmethod - def _terminate(self, info) -> bool: - return False - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def close(self): - del self.fig - - @property - def end_effector(self): - return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py index 12e5830..7789965 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py @@ -1,86 +1,19 @@ -from typing import Iterable, Union -from abc import ABCMeta, abstractmethod -import gym -import matplotlib.pyplot as plt -import numpy as np from gym import spaces -from gym.utils import seeding -from alr_envs.classic_control.utils import check_self_collision +import numpy as np +from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherEnv(gym.Env): +class BaseReacherTorqueEnv(BaseReacherEnv): """ - Simple Reaching Task without any physics simulation. - Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions - towards the end of the trajectory. + Base class for torque controlled reaching environments """ - def __init__(self, n_links: int, random_start: bool = True, - allow_self_collision: bool = False, collision_penalty: float = 1000): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.01 - - self.random_start = random_start - - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._is_collided = False - self.allow_self_collision = allow_self_collision - self.collision_penalty = collision_penalty - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self.max_torque = 1 - self.steps_before_reward = 199 + allow_self_collision: bool = False): + super().__init__(n_links, random_start, allow_self_collision) + self.max_torque = 1000 action_bound = np.ones((self.n_links,)) * self.max_torque - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def reset(self): - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() def step(self, action: np.ndarray): """ @@ -94,11 +27,6 @@ class BaseReacherEnv(gym.Env): self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - self._is_collided = self._check_collisions() reward, info = self._get_reward(action) @@ -107,36 +35,3 @@ class BaseReacherEnv(gym.Env): done = False return self._get_obs().copy(), reward, done, info - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) - - @abstractmethod - def _get_reward(self, action: np.ndarray) -> (float, dict): - pass - - @abstractmethod - def _get_obs(self) -> np.ndarray: - pass - - @abstractmethod - def _check_collisions(self) -> bool: - pass - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def close(self): - del self.fig - - @property - def end_effector(self): - return self._joints[self.n_links].T diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 55aac9e..2cb1e08 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -3,14 +3,12 @@ from typing import Union import gym import matplotlib.pyplot as plt import numpy as np -from gym.utils import seeding from matplotlib import patches -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherEnv -from alr_envs.alr.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -class HoleReacherEnv(BaseReacherEnv): +class HoleReacherEnv(BaseReacherDirectEnv): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, allow_wall_collision: bool = False, collision_penalty: float = 1000, rew_fct: str = "simple"): @@ -28,7 +26,7 @@ class HoleReacherEnv(BaseReacherEnv): self._tmp_depth = None self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - action_bound = np.pi * np.ones((self.n_links,)) + # action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -38,19 +36,25 @@ class HoleReacherEnv(BaseReacherEnv): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + # self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) if rew_fct == "simple": - from alr_envs.alr.classic_control.hole_reacher.simple_reward import HolereacherSimpleReward - self.reward_function = HolereacherSimpleReward(allow_self_collision, allow_wall_collision, collision_penalty) + from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward + self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) + else: + raise ValueError("Unknown reward function {}".format(rew_fct)) def reset(self): self._generate_hole() self._set_patches() + self.reward_function.reset() return super().reset() + def _get_reward(self, action: np.ndarray) -> (float, dict): + return self.reward_function.get_reward(self) + def _terminate(self, info): return info["is_collided"] @@ -118,10 +122,6 @@ class HoleReacherEnv(BaseReacherEnv): end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - # xy = np.stack((x, y), axis=2) - # - # self._joints[0] + np.cumsum(xy, axis=0) - return np.squeeze(end_effector + self._joints[0, :]) def check_wall_collision(self): @@ -157,36 +157,6 @@ class HoleReacherEnv(BaseReacherEnv): return False - # def check_wall_collision(self, ): - # """find the intersection of line segments A=(x1,y1)/(x2,y2) and - # B=(x3,y3)/(x4,y4). """ - # - # link_lines = np.hstack((self._joints[:-1, :], self._joints[1:, :])) - # - # all_points_product = np.hstack( - # [np.repeat(link_lines, len(self.ground_lines), axis=0), - # np.tile(self.ground_lines, (len(link_lines), 1))]) - # - # x1, y1, x2, y2, x3, y3, x4, y4 = all_points_product.T - # - # denom = ((x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)) - # # if denom == 0: - # # return False - # px = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / denom - # py = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / denom - # # if (px - x1) * (px - x2) < 0 and (py - y1) * (py - y2) < 0 \ - # # and (px - x3) * (px - x4) < 0 and (py - y3) * (py - y4) < 0: - # # return True # [px, py] - # test = ((px - x1) * (px - x2) <= 0) & ((py - y1) * (py - y2) <= 0) & ((px - x3) * (px - x4) <= 0) & ( - # (py - y3) * (py - y4) <= 0) - # if np.any(test): - # possible_collisions = np.stack((px, py)).T[test] - # for row in possible_collisions: - # if not np.any([np.allclose(row, x) for x in self._joints]): - # return True, row - # - # return False, None - def render(self, mode='human'): if self.fig is None: # Create base figure once on the beginning. Afterwards only update @@ -242,322 +212,14 @@ class HoleReacherEnv(BaseReacherEnv): self.fig.gca().add_patch(hole_floor) -class HoleReacherEnvOld(gym.Env): - - def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, - hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, - allow_wall_collision: bool = False, collision_penalty: float = 1000): - - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start - - # provided initial parameters - self.initial_x = hole_x # x-position of center of hole - self.initial_width = hole_width # width of hole - self.initial_depth = hole_depth # depth of hole - - # temp container for current env state - self._tmp_x = None - self._tmp_width = None - self._tmp_depth = None - self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - - # collision - self.allow_self_collision = allow_self_collision - self.allow_wall_collision = allow_wall_collision - self.collision_penalty = collision_penalty - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf], # hole width - # [np.inf], # hole depth - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - # @property - # def start_pos(self): - # return self._start_pos - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - A single step with an action in joint velocity space - """ - - acc = (action - self._angle_velocity) / self.dt - self._angle_velocity = action - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) - self._update_joints() - - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - self.end_effector_traj.append(np.copy(self.end_effector)) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - - def reset(self): - if self.random_start: - # Maybe change more than first seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._generate_hole() - self._set_patches() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - self.end_effector_traj = [] - - return self._get_obs().copy() - - def _generate_hole(self): - if self.initial_width is None: - width = self.np_random.uniform(0.15, 0.5) - else: - width = np.copy(self.initial_width) - if self.initial_x is None: - # sample whole on left or right side - direction = self.np_random.choice([-1, 1]) - # Hole center needs to be half the width away from the arm to give a valid setting. - x = direction * self.np_random.uniform(width / 2, 3.5) - else: - x = np.copy(self.initial_x) - if self.initial_depth is None: - # TODO we do not want this right now. - depth = self.np_random.uniform(1, 1) - else: - depth = np.copy(self.initial_depth) - - self._tmp_width = width - self._tmp_x = x - self._tmp_depth = depth - self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) - - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - wall_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - - if not self.allow_wall_collision: - wall_collision = self._check_wall_collision(line_points_in_taskspace) - - self._is_collided = self_collision or wall_collision - - def _get_reward(self, acc: np.ndarray): - reward = 0 - # success = False - - if self._steps == 199 or self._is_collided: - # return reward only in last time step - # Episode also terminates when colliding, hence return reward - dist = np.linalg.norm(self.end_effector - self._goal) - # success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided - - reward -= 5e-8 * np.sum(acc ** 2) - # info = {"is_success": success} - - return reward, {} # info - - def _get_obs(self): - theta = self._joint_angles - return np.hstack([ - np.cos(theta), - np.sin(theta), - self._angle_velocity, - self._tmp_width, - # self._tmp_hole_depth, - self.end_effector - self._goal, - self._steps - ]) - - def _get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - accumulated_theta = np.cumsum(theta, axis=0) - end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - end_effector[0, :, 0] = x[0, :] - end_effector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] - end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - - return np.squeeze(end_effector + self._joints[0, :]) - - def _check_wall_collision(self, line_points): - # all points that are before the hole in x - r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_before_hole > 0: - return True - - # all points that are after the hole in x - r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_after_hole > 0: - return True - - # all points that are above the hole - r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) - - # check if any of those points are below surface - nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) - - if nr_line_points_below_surface_in_hole > 0: - return True - - return False - - def render(self, mode='human'): - if self.fig is None: - # Create base figure once on the beginning. Afterwards only update - plt.ion() - self.fig = plt.figure() - ax = self.fig.add_subplot(1, 1, 1) - - # limits - lim = np.sum(self.link_lengths) + 0.5 - ax.set_xlim([-lim, lim]) - ax.set_ylim([-1.1, lim]) - - self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') - self._set_patches() - self.fig.show() - - self.fig.gca().set_title( - f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - - if mode == "human": - - # arm - self.line.set_data(self._joints[:, 0], self._joints[:, 1]) - - self.fig.canvas.draw() - self.fig.canvas.flush_events() - - elif mode == "partial": - if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: - # Arm - plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', - alpha=self._steps / 200) - - def _set_patches(self): - if self.fig is not None: - self.fig.gca().patches = [] - left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), - self.n_links + self._tmp_x - self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), - self.n_links - self._tmp_x + self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), - self._tmp_width, - 1 - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - - # Add the patch to the Axes - self.fig.gca().add_patch(left_block) - self.fig.gca().add_patch(right_block) - self.fig.gca().add_patch(hole_floor) - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - super().close() - if self.fig is not None: - plt.close(self.fig) - - if __name__ == "__main__": import time env = HoleReacherEnv(5) env.reset() - start = time.time() for i in range(10000): - # env.check_wall_collision() ac = env.action_space.sample() obs, rew, done, info = env.step(ac) # env.render() if done: env.reset() - print(time.time() - start) diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py new file mode 100644 index 0000000..1b18bce --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py @@ -0,0 +1,315 @@ +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +from gym.utils import seeding +from matplotlib import patches + +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.alr.classic_control.utils import check_self_collision + + +class HoleReacherEnvOld(gym.Env): + + def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, + hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, + allow_wall_collision: bool = False, collision_penalty: float = 1000): + + self.n_links = n_links + self.link_lengths = np.ones((n_links, 1)) + + self.random_start = random_start + + # provided initial parameters + self.initial_x = hole_x # x-position of center of hole + self.initial_width = hole_width # width of hole + self.initial_depth = hole_depth # depth of hole + + # temp container for current env state + self._tmp_x = None + self._tmp_width = None + self._tmp_depth = None + self._goal = None # x-y coordinates for reaching the center at the bottom of the hole + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + + # state + self._joints = None + self._joint_angles = None + self._angle_velocity = None + self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) + self._start_vel = np.zeros(self.n_links) + + self._dt = 0.01 + + action_bound = np.pi * np.ones((self.n_links,)) + state_bound = np.hstack([ + [np.pi] * self.n_links, # cos + [np.pi] * self.n_links, # sin + [np.inf] * self.n_links, # velocity + [np.inf], # hole width + # [np.inf], # hole depth + [np.inf] * 2, # x-y coordinates of target distance + [np.inf] # env steps, because reward start after n steps TODO: Maybe + ]) + self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) + self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) + + # containers for plotting + self.metadata = {'render.modes': ["human", "partial"]} + self.fig = None + + self._steps = 0 + self.seed() + + @property + def dt(self) -> Union[float, int]: + return self._dt + + # @property + # def start_pos(self): + # return self._start_pos + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + + def step(self, action: np.ndarray): + """ + A single step with an action in joint velocity space + """ + + acc = (action - self._angle_velocity) / self.dt + self._angle_velocity = action + self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) + self._update_joints() + + reward, info = self._get_reward(acc) + + info.update({"is_collided": self._is_collided}) + self.end_effector_traj.append(np.copy(self.end_effector)) + + self._steps += 1 + done = self._is_collided + + return self._get_obs().copy(), reward, done, info + + def reset(self): + if self.random_start: + # Maybe change more than first seed + first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) + self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) + self._start_pos = self._joint_angles.copy() + else: + self._joint_angles = self._start_pos + + self._generate_hole() + self._set_patches() + + self._angle_velocity = self._start_vel + self._joints = np.zeros((self.n_links + 1, 2)) + self._update_joints() + self._steps = 0 + self.end_effector_traj = [] + + return self._get_obs().copy() + + def _generate_hole(self): + if self.initial_width is None: + width = self.np_random.uniform(0.15, 0.5) + else: + width = np.copy(self.initial_width) + if self.initial_x is None: + # sample whole on left or right side + direction = self.np_random.choice([-1, 1]) + # Hole center needs to be half the width away from the arm to give a valid setting. + x = direction * self.np_random.uniform(width / 2, 3.5) + else: + x = np.copy(self.initial_x) + if self.initial_depth is None: + # TODO we do not want this right now. + depth = self.np_random.uniform(1, 1) + else: + depth = np.copy(self.initial_depth) + + self._tmp_width = width + self._tmp_x = x + self._tmp_depth = depth + self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) + + def _update_joints(self): + """ + update _joints to get new end effector position. The other links are only required for rendering. + Returns: + + """ + line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) + + self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] + self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] + + self_collision = False + wall_collision = False + + if not self.allow_self_collision: + self_collision = check_self_collision(line_points_in_taskspace) + if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: + self_collision = True + + if not self.allow_wall_collision: + wall_collision = self._check_wall_collision(line_points_in_taskspace) + + self._is_collided = self_collision or wall_collision + + def _get_reward(self, acc: np.ndarray): + reward = 0 + # success = False + + if self._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(self.end_effector - self._goal) + # success = dist < 0.005 and not self._is_collided + reward = - dist ** 2 - self.collision_penalty * self._is_collided + + reward -= 5e-8 * np.sum(acc ** 2) + # info = {"is_success": success} + + return reward, {} # info + + def _get_obs(self): + theta = self._joint_angles + return np.hstack([ + np.cos(theta), + np.sin(theta), + self._angle_velocity, + self._tmp_width, + # self._tmp_hole_depth, + self.end_effector - self._goal, + self._steps + ]) + + def _get_forward_kinematics(self, num_points_per_link=1): + theta = self._joint_angles[:, None] + + intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 + accumulated_theta = np.cumsum(theta, axis=0) + end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) + + x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points + y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points + + end_effector[0, :, 0] = x[0, :] + end_effector[0, :, 1] = y[0, :] + + for i in range(1, self.n_links): + end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] + end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] + + return np.squeeze(end_effector + self._joints[0, :]) + + def _check_wall_collision(self, line_points): + # all points that are before the hole in x + r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_before_hole > 0: + return True + + # all points that are after the hole in x + r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) + + # check if any of those points are below surface + nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) + + if nr_line_points_below_surface_after_hole > 0: + return True + + # all points that are above the hole + r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) + + # check if any of those points are below surface + nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) + + if nr_line_points_below_surface_in_hole > 0: + return True + + return False + + def render(self, mode='human'): + if self.fig is None: + # Create base figure once on the beginning. Afterwards only update + plt.ion() + self.fig = plt.figure() + ax = self.fig.add_subplot(1, 1, 1) + + # limits + lim = np.sum(self.link_lengths) + 0.5 + ax.set_xlim([-lim, lim]) + ax.set_ylim([-1.1, lim]) + + self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') + self._set_patches() + self.fig.show() + + self.fig.gca().set_title( + f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") + + if mode == "human": + + # arm + self.line.set_data(self._joints[:, 0], self._joints[:, 1]) + + self.fig.canvas.draw() + self.fig.canvas.flush_events() + + elif mode == "partial": + if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: + # Arm + plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', + alpha=self._steps / 200) + + def _set_patches(self): + if self.fig is not None: + self.fig.gca().patches = [] + left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), + self.n_links + self._tmp_x - self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), + self.n_links - self._tmp_x + self._tmp_width / 2, + self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), + self._tmp_width, + 1 - self._tmp_depth, + fill=True, edgecolor='k', facecolor='k') + + # Add the patch to the Axes + self.fig.gca().add_patch(left_block) + self.fig.gca().add_patch(right_block) + self.fig.gca().add_patch(hole_floor) + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + @property + def end_effector(self): + return self._joints[self.n_links].T + + def close(self): + super().close() + if self.fig is not None: + plt.close(self.fig) \ No newline at end of file diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py new file mode 100644 index 0000000..7fadad9 --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py @@ -0,0 +1,56 @@ +import numpy as np + + +class HolereacherReward: + def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): + self.collision_penalty = collision_penalty + + # collision + self.allow_self_collision = allow_self_collision + self.allow_wall_collision = allow_wall_collision + self.collision_penalty = collision_penalty + self._is_collided = False + + self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, -1)) + + def reset(self): + self._is_collided = False + + def get_reward(self, env): + dist_cost = 0 + collision_cost = 0 + time_cost = 0 + success = False + + self_collision = False + wall_collision = False + + if not self.allow_self_collision: + self_collision = env._check_self_collision() + + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() + + self._is_collided = self_collision or wall_collision + + if env._steps == 199 or self._is_collided: + # return reward only in last time step + # Episode also terminates when colliding, hence return reward + dist = np.linalg.norm(env.end_effector - env._goal) + + success = dist < 0.005 and not self._is_collided + dist_cost = int(not self._is_collided) * dist ** 2 + collision_cost = self._is_collided * dist ** 2 + time_cost = 199 - env._steps + + info = {"is_success": success, + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} + + vel_cost = np.sum(env._angle_velocity ** 2) + acc_cost = np.sum(env._acc ** 2) + + reward_features = np.array((dist_cost, vel_cost, acc_cost, collision_cost, time_cost)) + reward = np.dot(reward_features, self.reward_factors) + + return reward, info diff --git a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py similarity index 64% rename from alr_envs/alr/classic_control/hole_reacher/simple_reward.py rename to alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py index 0609a62..5820ab1 100644 --- a/alr_envs/alr/classic_control/hole_reacher/simple_reward.py +++ b/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py @@ -1,8 +1,7 @@ import numpy as np -from alr_envs.alr.classic_control.utils import check_self_collision -class HolereacherSimpleReward: +class HolereacherReward: def __init__(self, allow_self_collision, allow_wall_collision, collision_penalty): self.collision_penalty = collision_penalty @@ -11,16 +10,20 @@ class HolereacherSimpleReward: self.allow_wall_collision = allow_wall_collision self.collision_penalty = collision_penalty self._is_collided = False - pass - def get_reward(self, env, action): - reward = 0 + self.reward_factors = np.array((-1, -5e-8, -collision_penalty)) + + def reset(self): + self._is_collided = False + + def get_reward(self, env): + dist_cost = 0 + collision_cost = 0 success = False self_collision = False wall_collision = False - # joints = np.hstack((env._joints[:-1, :], env._joints[1:, :])) if not self.allow_self_collision: self_collision = env._check_self_collision() @@ -33,16 +36,18 @@ class HolereacherSimpleReward: # return reward only in last time step # Episode also terminates when colliding, hence return reward dist = np.linalg.norm(env.end_effector - env._goal) + dist_cost = dist ** 2 + collision_cost = int(self._is_collided) success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided info = {"is_success": success, - "is_collided": self._is_collided} + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} - acc = (action - env._angle_velocity) / env.dt - reward -= 5e-8 * np.sum(acc ** 2) + acc_cost = np.sum(env._acc ** 2) + + reward_features = np.array((dist_cost, acc_cost, collision_cost)) + reward = np.dot(reward_features, self.reward_factors) return reward, info - - diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 08c151f..40a4d95 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -1,26 +1,22 @@ from typing import Iterable, Union -import gym import matplotlib.pyplot as plt import numpy as np from gym import spaces -from gym.utils import seeding + +from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv -class SimpleReacherEnv(gym.Env): +class SimpleReacherEnv(BaseReacherTorqueEnv): """ Simple Reaching Task without any physics simulation. Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory. """ - def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True): - super().__init__() - self.link_lengths = np.ones(n_links) - self.n_links = n_links - self._dt = 0.1 - - self.random_start = random_start + def __init__(self, n_links: int, target: Union[None, Iterable] = None, random_start: bool = True, + allow_self_collision: bool = False,): + super().__init__(n_links, random_start, allow_self_collision) # provided initial parameters self.inital_target = target @@ -28,16 +24,10 @@ class SimpleReacherEnv(gym.Env): # temp container for current env state self._goal = None - self._joints = None - self._joint_angles = None - self._angle_velocity = None self._start_pos = np.zeros(self.n_links) - self._start_vel = np.zeros(self.n_links) - self.max_torque = 1 self.steps_before_reward = 199 - action_bound = np.ones((self.n_links,)) * self.max_torque state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -45,84 +35,24 @@ class SimpleReacherEnv(gym.Env): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps TODO: Maybe ]) - self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - # containers for plotting - self.metadata = {'render.modes': ["human"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - # @property # def start_pos(self): # return self._start_pos - @property - def current_pos(self): - return self._joint_angles - - @property - def current_vel(self): - return self._angle_velocity - - def step(self, action: np.ndarray): - """ - A single step with action in torque space - """ - - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity - self._update_joints() - - reward, info = self._get_reward(action) - - self._steps += 1 - done = False - - return self._get_obs().copy(), reward, done, info - def reset(self): - - # TODO: maybe do initialisation more random? - # Sample only orientation of first link, i.e. the arm is always straight. - if self.random_start: - self._joint_angles = np.hstack([[self.np_random.uniform(-np.pi, np.pi)], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - self._generate_goal() - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() - - def _update_joints(self): - """ - update joints to get new end-effector position. The other links are only required for rendering. - Returns: - - """ - angles = np.cumsum(self._joint_angles) - x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)]) - self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0) + return super().reset() def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal reward_dist = 0 + if not self.allow_self_collision: + self._is_collided = self._check_self_collision() + if self._steps >= self.steps_before_reward: reward_dist -= np.linalg.norm(diff) # reward_dist = np.exp(-0.1 * diff ** 2).mean() @@ -132,6 +62,9 @@ class SimpleReacherEnv(gym.Env): reward = reward_dist - reward_ctrl return reward, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) + def _terminate(self, info): + return False + def _get_obs(self): theta = self._joint_angles return np.hstack([ @@ -190,13 +123,14 @@ class SimpleReacherEnv(gym.Env): self.fig.canvas.draw() self.fig.canvas.flush_events() - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - def close(self): - del self.fig +if __name__ == "__main__": + env = SimpleReacherEnv(5) + env.reset() + for i in range(200): + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) - @property - def end_effector(self): - return self._joints[self.n_links].T + env.render() + if done: + break diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 95d1e7f..3b47969 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -6,17 +6,15 @@ import numpy as np from gym.utils import seeding from alr_envs.alr.classic_control.utils import check_self_collision +from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -class ViaPointReacherEnv(gym.Env): +class ViaPointReacherEnv(BaseReacherDirectEnv): def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None, target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start + super().__init__(n_links, random_start, allow_self_collision) # provided initial parameters self.intitial_target = target # provided target value @@ -30,17 +28,6 @@ class ViaPointReacherEnv(gym.Env): self.allow_self_collision = allow_self_collision self.collision_penalty = collision_penalty - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - self.weight_matrix_scale = 1 - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ [np.pi] * self.n_links, # cos [np.pi] * self.n_links, # sin @@ -49,69 +36,15 @@ class ViaPointReacherEnv(gym.Env): [np.inf] * 2, # x-y coordinates of target distance [np.inf] # env steps, because reward start after n steps ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self): - return self._dt - # @property # def start_pos(self): # return self._start_pos - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - a single step with an action in joint velocity space - """ - vel = action - self._angle_velocity = vel - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity - self._update_joints() - - acc = (vel - self._angle_velocity) / self.dt - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - def reset(self): - - if self.random_start: - # Maybe change more than dirst seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - self._generate_goal() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - - return self._get_obs().copy() + return super().reset() def _generate_goal(self): # TODO: Maybe improve this later, this can yield quite a lot of invalid settings @@ -137,29 +70,13 @@ class ViaPointReacherEnv(gym.Env): self._via_point = via_target self._goal = goal - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self.get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi): - self_collision = True - - self._is_collided = self_collision - def _get_reward(self, acc): success = False reward = -np.inf + + if not self.allow_self_collision: + self._is_collided = self._check_self_collision() + if not self._is_collided: dist = np.inf # return intermediate reward for via point @@ -177,10 +94,15 @@ class ViaPointReacherEnv(gym.Env): reward -= dist ** 2 reward -= 5e-8 * np.sum(acc ** 2) - info = {"is_success": success} + info = {"is_success": success, + "is_collided": self._is_collided, + "end_effector": np.copy(env.end_effector)} return reward, info + def _terminate(self, info): + return info["is_collided"] + def _get_obs(self): theta = self._joint_angles return np.hstack([ @@ -192,27 +114,6 @@ class ViaPointReacherEnv(gym.Env): self._steps ]) - def get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - - accumulated_theta = np.cumsum(theta, axis=0) - - endeffector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - endeffector[0, :, 0] = x[0, :] - endeffector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - endeffector[i, :, 0] = x[i, :] + endeffector[i - 1, -1, 0] - endeffector[i, :, 1] = y[i, :] + endeffector[i - 1, -1, 1] - - return np.squeeze(endeffector + self._joints[0, :]) - def render(self, mode='human'): goal_pos = self._goal.T via_pos = self._via_point.T @@ -281,14 +182,14 @@ class ViaPointReacherEnv(gym.Env): plt.pause(0.01) - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] +if __name__ == "__main__": + import time + env = ViaPointReacherEnv(5) + env.reset() - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - if self.fig is not None: - plt.close(self.fig) + for i in range(10000): + ac = env.action_space.sample() + obs, rew, done, info = env.step(ac) + env.render() + if done: + env.reset() diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index b307a52..e84da2f 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -149,16 +149,16 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': render = False # DMP - # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) # ProMP - # example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) # DetProMP example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) # Altered basis functions - # example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # Custom MP - # example_fully_custom_mp(seed=10, iterations=1, render=render) + example_fully_custom_mp(seed=10, iterations=1, render=render) From 66aa0ab9e515d81b3cdec3167bd3e6d736ad7fbd Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 09:33:26 +0100 Subject: [PATCH 12/15] removed old holereacher env --- alr_envs/alr/__init__.py | 4 +- alr_envs/alr/classic_control/__init__.py | 2 +- .../hole_reacher/hole_reacher.py | 3 + .../hole_reacher/hole_reacher_old.py | 315 ------------------ .../hole_reacher/hr_dist_vel_acc_reward.py | 22 +- 5 files changed, 19 insertions(+), 327 deletions(-) delete mode 100644 alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index f55c1df..18e30c8 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -362,7 +362,7 @@ for _v in _versions: "num_basis": 5, "duration": 2, "learn_goal": True, - "alpha_phase": 2, + "alpha_phase": 2.5, "bandwidth_factor": 2, "policy_type": "velocity", "weights_scale": 50, @@ -384,7 +384,7 @@ for _v in _versions: "num_basis": 5, "duration": 2, "policy_type": "velocity", - "weights_scale": 0.2, + "weights_scale": 0.1, "zero_start": True } } diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py index 38f0faf..73575ab 100644 --- a/alr_envs/alr/classic_control/__init__.py +++ b/alr_envs/alr/classic_control/__init__.py @@ -1,3 +1,3 @@ -from .hole_reacher.hole_reacher import HoleReacherEnv, HoleReacherEnvOld +from .hole_reacher.hole_reacher import HoleReacherEnv from .simple_reacher.simple_reacher import SimpleReacherEnv from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 2cb1e08..03ceee2 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -42,6 +42,9 @@ class HoleReacherEnv(BaseReacherDirectEnv): if rew_fct == "simple": from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) + elif rew_fct == "vel_acc": + from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward + self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) else: raise ValueError("Unknown reward function {}".format(rew_fct)) diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py deleted file mode 100644 index 1b18bce..0000000 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher_old.py +++ /dev/null @@ -1,315 +0,0 @@ -from typing import Union - -import gym -import matplotlib.pyplot as plt -import numpy as np -from gym.utils import seeding -from matplotlib import patches - -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv -from alr_envs.alr.classic_control.utils import check_self_collision - - -class HoleReacherEnvOld(gym.Env): - - def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, - hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, - allow_wall_collision: bool = False, collision_penalty: float = 1000): - - self.n_links = n_links - self.link_lengths = np.ones((n_links, 1)) - - self.random_start = random_start - - # provided initial parameters - self.initial_x = hole_x # x-position of center of hole - self.initial_width = hole_width # width of hole - self.initial_depth = hole_depth # depth of hole - - # temp container for current env state - self._tmp_x = None - self._tmp_width = None - self._tmp_depth = None - self._goal = None # x-y coordinates for reaching the center at the bottom of the hole - - # collision - self.allow_self_collision = allow_self_collision - self.allow_wall_collision = allow_wall_collision - self.collision_penalty = collision_penalty - - # state - self._joints = None - self._joint_angles = None - self._angle_velocity = None - self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) - self._start_vel = np.zeros(self.n_links) - - self._dt = 0.01 - - action_bound = np.pi * np.ones((self.n_links,)) - state_bound = np.hstack([ - [np.pi] * self.n_links, # cos - [np.pi] * self.n_links, # sin - [np.inf] * self.n_links, # velocity - [np.inf], # hole width - # [np.inf], # hole depth - [np.inf] * 2, # x-y coordinates of target distance - [np.inf] # env steps, because reward start after n steps TODO: Maybe - ]) - self.action_space = gym.spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) - self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) - - # containers for plotting - self.metadata = {'render.modes': ["human", "partial"]} - self.fig = None - - self._steps = 0 - self.seed() - - @property - def dt(self) -> Union[float, int]: - return self._dt - - # @property - # def start_pos(self): - # return self._start_pos - - @property - def current_pos(self): - return self._joint_angles.copy() - - @property - def current_vel(self): - return self._angle_velocity.copy() - - def step(self, action: np.ndarray): - """ - A single step with an action in joint velocity space - """ - - acc = (action - self._angle_velocity) / self.dt - self._angle_velocity = action - self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5) - self._update_joints() - - reward, info = self._get_reward(acc) - - info.update({"is_collided": self._is_collided}) - self.end_effector_traj.append(np.copy(self.end_effector)) - - self._steps += 1 - done = self._is_collided - - return self._get_obs().copy(), reward, done, info - - def reset(self): - if self.random_start: - # Maybe change more than first seed - first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) - self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) - self._start_pos = self._joint_angles.copy() - else: - self._joint_angles = self._start_pos - - self._generate_hole() - self._set_patches() - - self._angle_velocity = self._start_vel - self._joints = np.zeros((self.n_links + 1, 2)) - self._update_joints() - self._steps = 0 - self.end_effector_traj = [] - - return self._get_obs().copy() - - def _generate_hole(self): - if self.initial_width is None: - width = self.np_random.uniform(0.15, 0.5) - else: - width = np.copy(self.initial_width) - if self.initial_x is None: - # sample whole on left or right side - direction = self.np_random.choice([-1, 1]) - # Hole center needs to be half the width away from the arm to give a valid setting. - x = direction * self.np_random.uniform(width / 2, 3.5) - else: - x = np.copy(self.initial_x) - if self.initial_depth is None: - # TODO we do not want this right now. - depth = self.np_random.uniform(1, 1) - else: - depth = np.copy(self.initial_depth) - - self._tmp_width = width - self._tmp_x = x - self._tmp_depth = depth - self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) - - def _update_joints(self): - """ - update _joints to get new end effector position. The other links are only required for rendering. - Returns: - - """ - line_points_in_taskspace = self._get_forward_kinematics(num_points_per_link=20) - - self._joints[1:, 0] = self._joints[0, 0] + line_points_in_taskspace[:, -1, 0] - self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] - - self_collision = False - wall_collision = False - - if not self.allow_self_collision: - self_collision = check_self_collision(line_points_in_taskspace) - if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: - self_collision = True - - if not self.allow_wall_collision: - wall_collision = self._check_wall_collision(line_points_in_taskspace) - - self._is_collided = self_collision or wall_collision - - def _get_reward(self, acc: np.ndarray): - reward = 0 - # success = False - - if self._steps == 199 or self._is_collided: - # return reward only in last time step - # Episode also terminates when colliding, hence return reward - dist = np.linalg.norm(self.end_effector - self._goal) - # success = dist < 0.005 and not self._is_collided - reward = - dist ** 2 - self.collision_penalty * self._is_collided - - reward -= 5e-8 * np.sum(acc ** 2) - # info = {"is_success": success} - - return reward, {} # info - - def _get_obs(self): - theta = self._joint_angles - return np.hstack([ - np.cos(theta), - np.sin(theta), - self._angle_velocity, - self._tmp_width, - # self._tmp_hole_depth, - self.end_effector - self._goal, - self._steps - ]) - - def _get_forward_kinematics(self, num_points_per_link=1): - theta = self._joint_angles[:, None] - - intermediate_points = np.linspace(0, 1, num_points_per_link) if num_points_per_link > 1 else 1 - accumulated_theta = np.cumsum(theta, axis=0) - end_effector = np.zeros(shape=(self.n_links, num_points_per_link, 2)) - - x = np.cos(accumulated_theta) * self.link_lengths * intermediate_points - y = np.sin(accumulated_theta) * self.link_lengths * intermediate_points - - end_effector[0, :, 0] = x[0, :] - end_effector[0, :, 1] = y[0, :] - - for i in range(1, self.n_links): - end_effector[i, :, 0] = x[i, :] + end_effector[i - 1, -1, 0] - end_effector[i, :, 1] = y[i, :] + end_effector[i - 1, -1, 1] - - return np.squeeze(end_effector + self._joints[0, :]) - - def _check_wall_collision(self, line_points): - # all points that are before the hole in x - r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_before_hole > 0: - return True - - # all points that are after the hole in x - r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) - - # check if any of those points are below surface - nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) - - if nr_line_points_below_surface_after_hole > 0: - return True - - # all points that are above the hole - r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) - - # check if any of those points are below surface - nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) - - if nr_line_points_below_surface_in_hole > 0: - return True - - return False - - def render(self, mode='human'): - if self.fig is None: - # Create base figure once on the beginning. Afterwards only update - plt.ion() - self.fig = plt.figure() - ax = self.fig.add_subplot(1, 1, 1) - - # limits - lim = np.sum(self.link_lengths) + 0.5 - ax.set_xlim([-lim, lim]) - ax.set_ylim([-1.1, lim]) - - self.line, = ax.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') - self._set_patches() - self.fig.show() - - self.fig.gca().set_title( - f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") - - if mode == "human": - - # arm - self.line.set_data(self._joints[:, 0], self._joints[:, 1]) - - self.fig.canvas.draw() - self.fig.canvas.flush_events() - - elif mode == "partial": - if self._steps % 20 == 0 or self._steps in [1, 199] or self._is_collided: - # Arm - plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k', - alpha=self._steps / 200) - - def _set_patches(self): - if self.fig is not None: - self.fig.gca().patches = [] - left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), - self.n_links + self._tmp_x - self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), - self.n_links - self._tmp_x + self._tmp_width / 2, - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), - self._tmp_width, - 1 - self._tmp_depth, - fill=True, edgecolor='k', facecolor='k') - - # Add the patch to the Axes - self.fig.gca().add_patch(left_block) - self.fig.gca().add_patch(right_block) - self.fig.gca().add_patch(hole_floor) - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - @property - def end_effector(self): - return self._joints[self.n_links].T - - def close(self): - super().close() - if self.fig is not None: - plt.close(self.fig) \ No newline at end of file diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py index 7fadad9..1d75b66 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py +++ b/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py @@ -11,10 +11,11 @@ class HolereacherReward: self.collision_penalty = collision_penalty self._is_collided = False - self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, -1)) + self.reward_factors = np.array((-1, -1e-4, -1e-6, -collision_penalty, 0)) def reset(self): self._is_collided = False + self.collision_dist = 0 def get_reward(self, env): dist_cost = 0 @@ -25,22 +26,25 @@ class HolereacherReward: self_collision = False wall_collision = False - if not self.allow_self_collision: - self_collision = env._check_self_collision() + if not self._is_collided: + if not self.allow_self_collision: + self_collision = env._check_self_collision() - if not self.allow_wall_collision: - wall_collision = env.check_wall_collision() + if not self.allow_wall_collision: + wall_collision = env.check_wall_collision() - self._is_collided = self_collision or wall_collision + self._is_collided = self_collision or wall_collision - if env._steps == 199 or self._is_collided: + self.collision_dist = np.linalg.norm(env.end_effector - env._goal) + + if env._steps == 199: # or self._is_collided: # return reward only in last time step # Episode also terminates when colliding, hence return reward dist = np.linalg.norm(env.end_effector - env._goal) success = dist < 0.005 and not self._is_collided - dist_cost = int(not self._is_collided) * dist ** 2 - collision_cost = self._is_collided * dist ** 2 + dist_cost = dist ** 2 + collision_cost = self._is_collided * self.collision_dist ** 2 time_cost = 199 - env._steps info = {"is_success": success, From cfa49a04ba80804a078b2b6d6f6813e3083e2be2 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 10:33:04 +0100 Subject: [PATCH 13/15] added ABC to reacher base envs --- .../alr/classic_control/base_reacher/base_reacher.py | 4 ++-- .../classic_control/base_reacher/base_reacher_direct.py | 6 ++++-- .../classic_control/base_reacher/base_reacher_torque.py | 9 ++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py index 66b234f..dd8c64b 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -1,5 +1,5 @@ from typing import Iterable, Union -from abc import ABCMeta, abstractmethod +from abc import ABC, abstractmethod import gym import matplotlib.pyplot as plt import numpy as np @@ -8,7 +8,7 @@ from gym.utils import seeding from alr_envs.alr.classic_control.utils import intersect -class BaseReacherEnv(gym.Env): +class BaseReacherEnv(gym.Env, ABC): """ Base class for all reaching environments. """ diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py index 4f7b6f1..dc79827 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py @@ -1,9 +1,11 @@ +from abc import ABC + from gym import spaces import numpy as np from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherDirectEnv(BaseReacherEnv): +class BaseReacherDirectEnv(BaseReacherEnv, ABC): """ Base class for directly controlled reaching environments """ @@ -11,7 +13,7 @@ class BaseReacherDirectEnv(BaseReacherEnv): allow_self_collision: bool = False): super().__init__(n_links, random_start, allow_self_collision) - self.max_vel = 10 * np.pi + self.max_vel = 2 * np.pi action_bound = np.ones((self.n_links,)) * self.max_vel self.action_space = spaces.Box(low=-action_bound, high=action_bound, shape=action_bound.shape) diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py index 7789965..094f632 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py @@ -1,9 +1,11 @@ +from abc import ABC + from gym import spaces import numpy as np from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv -class BaseReacherTorqueEnv(BaseReacherEnv): +class BaseReacherTorqueEnv(BaseReacherEnv, ABC): """ Base class for torque controlled reaching environments """ @@ -20,10 +22,7 @@ class BaseReacherTorqueEnv(BaseReacherEnv): A single step with action in torque space """ - # action = self._add_action_noise(action) - ac = np.clip(action, -self.max_torque, self.max_torque) - - self._angle_velocity = self._angle_velocity + self.dt * ac + self._angle_velocity = self._angle_velocity + self.dt * action self._joint_angles = self._joint_angles + self.dt * self._angle_velocity self._update_joints() From ca90f257d458cbd4820bacfe72a8bc037c200874 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 30 Nov 2021 12:05:19 +0100 Subject: [PATCH 14/15] reacher envs pass unittests --- alr_envs/alr/__init__.py | 4 ++-- .../alr/classic_control/base_reacher/base_reacher.py | 5 +++++ .../alr/classic_control/hole_reacher/hole_reacher.py | 5 ++++- .../classic_control/simple_reacher/simple_reacher.py | 3 +++ .../viapoint_reacher/viapoint_reacher.py | 6 ++++-- alr_envs/alr/mujoco/reacher/alr_reacher.py | 1 + alr_envs/examples/examples_motion_primitives.py | 10 +++++----- 7 files changed, 24 insertions(+), 10 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 18e30c8..7521ccf 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -237,7 +237,7 @@ for _v in _versions: "mp_kwargs": { "num_dof": 2 if "long" not in _v.lower() else 5, "num_basis": 5, - "duration": 20, + "duration": 2, "alpha_phase": 2, "learn_goal": True, "policy_type": "velocity", @@ -277,7 +277,7 @@ for _v in _versions: "mp_kwargs": { "num_dof": 2 if "long" not in _v.lower() else 5, "num_basis": 5, - "duration": 20, + "duration": 2, "width": 0.025, "policy_type": "velocity", "weights_scale": 0.2, diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/alr/classic_control/base_reacher/base_reacher.py index dd8c64b..1b1ad19 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/alr/classic_control/base_reacher/base_reacher.py @@ -22,6 +22,8 @@ class BaseReacherEnv(gym.Env, ABC): self.random_start = random_start + self.allow_self_collision = allow_self_collision + # state self._joints = None self._joint_angles = None @@ -103,6 +105,9 @@ class BaseReacherEnv(gym.Env, ABC): def _check_self_collision(self): """Checks whether line segments intersect""" + if self.allow_self_collision: + return False + if np.any(self._joint_angles > self.j_max) or np.any(self._joint_angles < self.j_min): return True diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index 03ceee2..dd7321a 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -127,6 +127,9 @@ class HoleReacherEnv(BaseReacherDirectEnv): return np.squeeze(end_effector + self._joints[0, :]) + def _check_collisions(self) -> bool: + return self._check_self_collision() or self.check_wall_collision() + def check_wall_collision(self): line_points = self._get_line_points(num_points_per_link=100) @@ -223,6 +226,6 @@ if __name__ == "__main__": for i in range(10000): ac = env.action_space.sample() obs, rew, done, info = env.step(ac) - # env.render() + env.render() if done: env.reset() diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 40a4d95..758f824 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -88,6 +88,9 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self._goal = goal + def _check_collisions(self) -> bool: + return self._check_self_collision() + def render(self, mode='human'): # pragma: no cover if self.fig is None: # Create base figure once on the beginning. Afterwards only update diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 3b47969..748eb99 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -25,7 +25,6 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self._goal = np.array((n_links, 0)) # collision - self.allow_self_collision = allow_self_collision self.collision_penalty = collision_penalty state_bound = np.hstack([ @@ -96,7 +95,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): reward -= 5e-8 * np.sum(acc ** 2) info = {"is_success": success, "is_collided": self._is_collided, - "end_effector": np.copy(env.end_effector)} + "end_effector": np.copy(self.end_effector)} return reward, info @@ -114,6 +113,9 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self._steps ]) + def _check_collisions(self) -> bool: + return self._check_self_collision() + def render(self, mode='human'): goal_pos = self._goal.T via_pos = self._via_point.T diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py index 6ca66d1..2d122d2 100644 --- a/alr_envs/alr/mujoco/reacher/alr_reacher.py +++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py @@ -87,6 +87,7 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle): [self._steps], ]) + if __name__ == '__main__': nl = 5 render_mode = "human" # "human" or "partial" or "final" diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index e84da2f..0df05c1 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -112,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. - wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper] + wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper] mp_kwargs = { "num_dof": 5, "num_basis": 5, @@ -147,15 +147,15 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = False + render = True # DMP - example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=10, render=render) + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # ProMP - example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render) # DetProMP - example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=100, render=render) + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) # Altered basis functions example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) From f9e7a34edae81833024727ac3224c764218698ac Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Wed, 1 Dec 2021 15:55:38 +0100 Subject: [PATCH 15/15] added dtype in observation spaces --- alr_envs/alr/classic_control/hole_reacher/hole_reacher.py | 2 +- .../alr/classic_control/simple_reacher/simple_reacher.py | 2 +- .../classic_control/viapoint_reacher/viapoint_reacher.py | 2 +- alr_envs/dmc/dmc_wrapper.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index dd7321a..208f005 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -106,7 +106,7 @@ class HoleReacherEnv(BaseReacherDirectEnv): # self._tmp_hole_depth, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _get_line_points(self, num_points_per_link=1): theta = self._joint_angles[:, None] diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 758f824..dac06a3 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -73,7 +73,7 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): self._angle_velocity, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _generate_goal(self): diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 748eb99..b44647e 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -111,7 +111,7 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): self.end_effector - self._via_point, self.end_effector - self._goal, self._steps - ]) + ]).astype(np.float32) def _check_collisions(self) -> bool: return self._check_self_collision() diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py index 10f1af9..aa6c7aa 100644 --- a/alr_envs/dmc/dmc_wrapper.py +++ b/alr_envs/dmc/dmc_wrapper.py @@ -15,10 +15,10 @@ def _spec_to_box(spec): assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" dim = int(np.prod(s.shape)) if type(s) == specs.Array: - bound = np.inf * np.ones(dim, dtype=np.float32) + bound = np.inf * np.ones(dim, dtype=s.dtype) return -bound, bound elif type(s) == specs.BoundedArray: - zeros = np.zeros(dim, dtype=np.float32) + zeros = np.zeros(dim, dtype=s.dtype) return s.minimum + zeros, s.maximum + zeros mins, maxs = [], [] @@ -29,7 +29,7 @@ def _spec_to_box(spec): low = np.concatenate(mins, axis=0) high = np.concatenate(maxs, axis=0) assert low.shape == high.shape - return spaces.Box(low, high, dtype=np.float32) + return spaces.Box(low, high, dtype=s.dtype) def _flatten_obs(obs: collections.MutableMapping): @@ -113,7 +113,7 @@ class DMCWrapper(core.Env): if self._channels_first: obs = obs.transpose(2, 0, 1).copy() else: - obs = _flatten_obs(time_step.observation) + obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) return obs @property