From 58131ef470bfe855a5fc169ad15ae8c2c54325ec Mon Sep 17 00:00:00 2001 From: ottofabian Date: Mon, 7 Dec 2020 11:13:27 +0100 Subject: [PATCH] Added balancing reacher task and stochastic search task interface --- alr_envs/__init__.py | 104 ++++++++---------- alr_envs/classic_control/simple_reacher.py | 4 - alr_envs/mujoco/alr_reacher.py | 20 +++- alr_envs/stochastic_search/__init__.py | 1 + .../stochastic_search/functions/__init__.py | 0 .../stochastic_search/functions/f_base.py | 76 +++++++++++++ .../functions/f_rosenbrock.py | 56 ++++++++++ .../stochastic_search/stochastic_search.py | 22 ++++ alr_envs/utils/__init__.py | 0 alr_envs/utils/utils.py | 20 ++++ 10 files changed, 236 insertions(+), 67 deletions(-) create mode 100644 alr_envs/stochastic_search/__init__.py create mode 100644 alr_envs/stochastic_search/functions/__init__.py create mode 100644 alr_envs/stochastic_search/functions/f_base.py create mode 100644 alr_envs/stochastic_search/functions/f_rosenbrock.py create mode 100644 alr_envs/stochastic_search/stochastic_search.py create mode 100644 alr_envs/utils/__init__.py create mode 100644 alr_envs/utils/utils.py diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 8f0264c..2193db9 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -12,6 +12,37 @@ register( } ) +register( + id='ALRReacherSparse-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + } +) + +register( + id='ALRReacherSparseBalanced-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": True, + } +) + +register( + id='ALRReacherShort-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=50, + kwargs={ + "steps_before_reward": 0, + "n_links": 5, + } +) + register( id='ALRReacherShortSparse-v0', entry_point='alr_envs.mujoco:ALRReacherEnv', @@ -22,46 +53,6 @@ register( } ) -register( - id='ALRReacherShort-v0', - entry_point='alr_envs.mujoco:ALRReacherEnv', - max_episode_steps=50, - kwargs={ - "steps_before_reward": 40, - "n_links": 5, - } -) - -register( - id='ALRReacherSparse-v0', - entry_point='alr_envs.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - } -) - -register( - id='ALRReacher100-v0', - entry_point='alr_envs.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 100, - "n_links": 5, - } -) - -register( - id='ALRReacher180-v0', - entry_point='alr_envs.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 180, - "n_links": 5, - } -) - register( id='ALRReacher7-v0', entry_point='alr_envs.mujoco:ALRReacherEnv', @@ -73,21 +64,31 @@ register( ) register( - id='ALRReacher100_7-v0', + id='ALRReacherSparse-v0', entry_point='alr_envs.mujoco:ALRReacherEnv', max_episode_steps=200, kwargs={ - "steps_before_reward": 100, + "steps_before_reward": 200, "n_links": 7, } ) register( - id='ALRReacher180_7-v0', + id='ALRReacher7Short-v0', entry_point='alr_envs.mujoco:ALRReacherEnv', - max_episode_steps=200, + max_episode_steps=50, kwargs={ - "steps_before_reward": 180, + "steps_before_reward": 0, + "n_links": 7, + } +) + +register( + id='ALRReacher7ShortSparse-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=50, + kwargs={ + "steps_before_reward": 50, "n_links": 7, } ) @@ -101,16 +102,6 @@ register( } ) -register( - id='SimpleReacher5-v0', - entry_point='alr_envs.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - } -) - - register( id='SimpleReacher5-v0', entry_point='alr_envs.classic_control:SimpleReacherEnv', @@ -121,7 +112,6 @@ register( ) for dim in [5, 10, 25, 50, 100]: - register( id=f'Rosenbrock{dim}-v0', entry_point='alr_envs.stochastic_search:StochasticSearchEnv', @@ -129,4 +119,4 @@ for dim in [5, 10, 25, 50, 100]: kwargs={ "cost_f": Rosenbrock, } - ) \ No newline at end of file + ) diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py index 3a54432..042b207 100644 --- a/alr_envs/classic_control/simple_reacher.py +++ b/alr_envs/classic_control/simple_reacher.py @@ -171,7 +171,3 @@ class SimpleReacherEnv(gym.Env): @property def end_effector(self): return self._joints[self.n_links].T - - -def angle_normalize(x): - return ((x + np.pi) % (2 * np.pi)) - np.pi diff --git a/alr_envs/mujoco/alr_reacher.py b/alr_envs/mujoco/alr_reacher.py index a7e4e18..7ae28da 100644 --- a/alr_envs/mujoco/alr_reacher.py +++ b/alr_envs/mujoco/alr_reacher.py @@ -1,15 +1,21 @@ -import numpy as np import os + +import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from alr_envs.utils.utils import angle_normalize + class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self, steps_before_reward=200, n_links=5): + def __init__(self, steps_before_reward=200, n_links=5, balance=False): self._steps = 0 self.steps_before_reward = steps_before_reward self.n_links = n_links + self.balance = balance + self.balance_weight = 1.0 + self.reward_weight = 1 if steps_before_reward == 200: self.reward_weight = 200 @@ -29,20 +35,22 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def step(self, a): self._steps += 1 - reward_dist = 0 - angular_vel = 0 + reward_dist = 0.0 + angular_vel = 0.0 if self._steps >= self.steps_before_reward: vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist -= self.reward_weight * np.linalg.norm(vec) angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) reward_ctrl = - np.square(a).sum() + reward_balance = - self.balance_weight * np.abs( + angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) - reward = reward_dist + reward_ctrl + angular_vel + reward = reward_dist + reward_ctrl + angular_vel + reward_balance self.do_simulation(a, self.frame_skip) ob = self._get_obs() done = False return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, - velocity=angular_vel, + velocity=angular_vel, reward_balance=reward_balance, end_effector=self.get_body_com("fingertip").copy(), goal=self.goal if hasattr(self, "goal") else None) diff --git a/alr_envs/stochastic_search/__init__.py b/alr_envs/stochastic_search/__init__.py new file mode 100644 index 0000000..257680f --- /dev/null +++ b/alr_envs/stochastic_search/__init__.py @@ -0,0 +1 @@ +from alr_envs.stochastic_search.stochastic_search import StochasticSearchEnv diff --git a/alr_envs/stochastic_search/functions/__init__.py b/alr_envs/stochastic_search/functions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/stochastic_search/functions/f_base.py b/alr_envs/stochastic_search/functions/f_base.py new file mode 100644 index 0000000..31b4323 --- /dev/null +++ b/alr_envs/stochastic_search/functions/f_base.py @@ -0,0 +1,76 @@ +import numpy as np +import scipy.stats as scistats + +np.seterr(divide='ignore', invalid='ignore') + + +class BaseObjective(object): + def __init__(self, dim, int_opt=None, val_opt=None, alpha=None, beta=None): + self.dim = dim + self.alpha = alpha + self.beta = beta + # check if optimal parameter is in interval... + if int_opt is not None: + self.x_opt = np.random.uniform(int_opt[0], int_opt[1], size=(1, dim)) + # ... or based on a single value + elif val_opt is not None: + self.one_pm = np.where(np.random.rand(1, dim) > 0.5, 1, -1) + self.x_opt = val_opt * self.one_pm + else: + raise ValueError("Optimal value or interval has to be defined") + self.f_opt = np.round(np.clip(scistats.cauchy.rvs(loc=0, scale=100, size=1)[0], -1000, 1000), decimals=2) + self.i = np.arange(self.dim) + self._lambda_alpha = None + self._q = None + self._r = None + + def __call__(self, x): + return self.evaluate_full(x) + + def evaluate_full(self, x): + raise NotImplementedError("Subclasses should implement this!") + + def gs(self): + # Gram Schmidt ortho-normalization + a = np.random.randn(self.dim, self.dim) + b, _ = np.linalg.qr(a) + return b + + # TODO: property probably unnecessary + @property + def q(self): + if self._q is None: + self._q = self.gs() + return self._q + + @property + def r(self): + if self._r is None: + self._r = self.gs() + return self._r + + @property + def lambda_alpha(self): + if self._lambda_alpha is None: + if isinstance(self.alpha, int): + lambda_ii = np.power(self.alpha, 1 / 2 * self.i / (self.dim - 1)) + self._lambda_alpha = np.diag(lambda_ii) + else: + lambda_ii = np.power(self.alpha[:, None], 1 / 2 * self.i[None, :] / (self.dim - 1)) + self._lambda_alpha = np.stack([np.diag(l_ii) for l_ii in lambda_ii]) + return self._lambda_alpha + + @staticmethod + def f_pen(x): + return np.sum(np.maximum(0, np.abs(x) - 5), axis=1) + + def t_asy_beta(self, x): + # exp = np.power(x, 1 + self.beta * self.i[:, None] / (self.input_dim - 1) * np.sqrt(x)) + # return np.where(x > 0, exp, x) + return x + + def t_osz(self, x): + x_hat = np.where(x != 0, np.log(np.abs(x)), 0) + c_1 = np.where(x > 0, 10, 5.5) + c_2 = np.where(x > 0, 7.9, 3.1) + return np.sign(x) * np.exp(x_hat + 0.049 * (np.sin(c_1 * x_hat) + np.sin(c_2 * x_hat))) diff --git a/alr_envs/stochastic_search/functions/f_rosenbrock.py b/alr_envs/stochastic_search/functions/f_rosenbrock.py new file mode 100644 index 0000000..a0f6bc4 --- /dev/null +++ b/alr_envs/stochastic_search/functions/f_rosenbrock.py @@ -0,0 +1,56 @@ +import numpy as np + +from alr_envs.stochastic_search.functions.f_base import BaseObjective + + +class Rosenbrock(BaseObjective): + def __init__(self, dim, int_opt=(-3., 3.)): + super(Rosenbrock, self).__init__(dim, int_opt=int_opt) + self.c = np.maximum(1, np.sqrt(self.dim) / 8) + + def evaluate_full(self, x): + x = np.atleast_2d(x) + assert x.shape[1] == self.dim + + z = self.c * (x - self.x_opt) + 1 + z_end = z[:, 1:] + z_begin = z[:, :-1] + + a = z_begin ** 2 - z_end + b = z_begin - 1 + + return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt + + +class RosenbrockRotated(BaseObjective): + def __init__(self, dim, int_opt=(-3., 3.)): + super(RosenbrockRotated, self).__init__(dim, int_opt=int_opt) + self.c = np.maximum(1, np.sqrt(self.dim) / 8) + + def evaluate_full(self, x): + x = np.atleast_2d(x) + assert x.shape[1] == self.dim + + z = (self.c * self.r @ x.T + 1 / 2).T + a = z[:, :-1] ** 2 - z[:, 1:] + b = z[:, :-1] - 1 + + return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt + + +class RosenbrockRaw(BaseObjective): + def __init__(self, dim, int_opt=(-3., 3.)): + super(RosenbrockRaw, self).__init__(dim, int_opt=int_opt) + self.x_opt = np.ones((1, dim)) + self.f_opt = 0 + + def evaluate_full(self, x): + x = np.atleast_2d(x) + assert x.shape[1] == self.dim + + a = x[:, :-1] ** 2 - x[:, 1:] + b = x[:, :-1] - 1 + + out = np.sum(100 * a ** 2 + b ** 2, axis=1) + + return out diff --git a/alr_envs/stochastic_search/stochastic_search.py b/alr_envs/stochastic_search/stochastic_search.py new file mode 100644 index 0000000..fd9af8d --- /dev/null +++ b/alr_envs/stochastic_search/stochastic_search.py @@ -0,0 +1,22 @@ +import gym +import numpy as np + +from alr_envs.stochastic_search.functions.f_base import BaseObjective + + +class StochasticSearchEnv(gym.Env): + + def __init__(self, cost_f: BaseObjective): + self.cost_f = cost_f + + self.action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cost_f.dim,), dtype=np.float64) + self.observation_space = gym.spaces.Box(low=(), high=(), shape=(), dtype=np.float64) + + def step(self, action): + return np.zeros(self.observation_space.shape), np.squeeze(-self.cost_f(action)), True, {} + + def reset(self): + return np.zeros(self.observation_space.shape) + + def render(self, mode='human'): + pass diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py new file mode 100644 index 0000000..0bca03e --- /dev/null +++ b/alr_envs/utils/utils.py @@ -0,0 +1,20 @@ +import numpy as np + + +def angle_normalize(x, type="deg"): + """ + normalize angle x to [-pi,pi]. + Args: + x: Angle in either degrees or radians + type: one of "deg" or "rad" for x being in degrees or radians + + Returns: + + """ + if type == "deg": + return ((x + np.pi) % (2 * np.pi)) - np.pi + elif type == "rad": + two_pi = 2 * np.pi + return x - two_pi * np.floor((x + np.pi) / two_pi) + else: + raise ValueError(f"Invalid type {type}. Choose on of 'deg' or 'rad'.")