From 58131ef470bfe855a5fc169ad15ae8c2c54325ec Mon Sep 17 00:00:00 2001
From: ottofabian <fabi.otto@t-online.de>
Date: Mon, 7 Dec 2020 11:13:27 +0100
Subject: [PATCH] Added balancing reacher task and stochastic search task
 interface

---
 alr_envs/__init__.py                          | 104 ++++++++----------
 alr_envs/classic_control/simple_reacher.py    |   4 -
 alr_envs/mujoco/alr_reacher.py                |  20 +++-
 alr_envs/stochastic_search/__init__.py        |   1 +
 .../stochastic_search/functions/__init__.py   |   0
 .../stochastic_search/functions/f_base.py     |  76 +++++++++++++
 .../functions/f_rosenbrock.py                 |  56 ++++++++++
 .../stochastic_search/stochastic_search.py    |  22 ++++
 alr_envs/utils/__init__.py                    |   0
 alr_envs/utils/utils.py                       |  20 ++++
 10 files changed, 236 insertions(+), 67 deletions(-)
 create mode 100644 alr_envs/stochastic_search/__init__.py
 create mode 100644 alr_envs/stochastic_search/functions/__init__.py
 create mode 100644 alr_envs/stochastic_search/functions/f_base.py
 create mode 100644 alr_envs/stochastic_search/functions/f_rosenbrock.py
 create mode 100644 alr_envs/stochastic_search/stochastic_search.py
 create mode 100644 alr_envs/utils/__init__.py
 create mode 100644 alr_envs/utils/utils.py

diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index 8f0264c..2193db9 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -12,6 +12,37 @@ register(
     }
 )
 
+register(
+    id='ALRReacherSparse-v0',
+    entry_point='alr_envs.mujoco:ALRReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "steps_before_reward": 200,
+        "n_links": 5,
+    }
+)
+
+register(
+    id='ALRReacherSparseBalanced-v0',
+    entry_point='alr_envs.mujoco:ALRReacherEnv',
+    max_episode_steps=200,
+    kwargs={
+        "steps_before_reward": 200,
+        "n_links": 5,
+        "balance": True,
+    }
+)
+
+register(
+    id='ALRReacherShort-v0',
+    entry_point='alr_envs.mujoco:ALRReacherEnv',
+    max_episode_steps=50,
+    kwargs={
+        "steps_before_reward": 0,
+        "n_links": 5,
+    }
+)
+
 register(
     id='ALRReacherShortSparse-v0',
     entry_point='alr_envs.mujoco:ALRReacherEnv',
@@ -22,46 +53,6 @@ register(
     }
 )
 
-register(
-    id='ALRReacherShort-v0',
-    entry_point='alr_envs.mujoco:ALRReacherEnv',
-    max_episode_steps=50,
-    kwargs={
-        "steps_before_reward": 40,
-        "n_links": 5,
-    }
-)
-
-register(
-    id='ALRReacherSparse-v0',
-    entry_point='alr_envs.mujoco:ALRReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "steps_before_reward": 200,
-        "n_links": 5,
-    }
-)
-
-register(
-    id='ALRReacher100-v0',
-    entry_point='alr_envs.mujoco:ALRReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "steps_before_reward": 100,
-        "n_links": 5,
-    }
-)
-
-register(
-    id='ALRReacher180-v0',
-    entry_point='alr_envs.mujoco:ALRReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "steps_before_reward": 180,
-        "n_links": 5,
-    }
-)
-
 register(
     id='ALRReacher7-v0',
     entry_point='alr_envs.mujoco:ALRReacherEnv',
@@ -73,21 +64,31 @@ register(
 )
 
 register(
-    id='ALRReacher100_7-v0',
+    id='ALRReacherSparse-v0',
     entry_point='alr_envs.mujoco:ALRReacherEnv',
     max_episode_steps=200,
     kwargs={
-        "steps_before_reward": 100,
+        "steps_before_reward": 200,
         "n_links": 7,
     }
 )
 
 register(
-    id='ALRReacher180_7-v0',
+    id='ALRReacher7Short-v0',
     entry_point='alr_envs.mujoco:ALRReacherEnv',
-    max_episode_steps=200,
+    max_episode_steps=50,
     kwargs={
-        "steps_before_reward": 180,
+        "steps_before_reward": 0,
+        "n_links": 7,
+    }
+)
+
+register(
+    id='ALRReacher7ShortSparse-v0',
+    entry_point='alr_envs.mujoco:ALRReacherEnv',
+    max_episode_steps=50,
+    kwargs={
+        "steps_before_reward": 50,
         "n_links": 7,
     }
 )
@@ -101,16 +102,6 @@ register(
     }
 )
 
-register(
-    id='SimpleReacher5-v0',
-    entry_point='alr_envs.classic_control:SimpleReacherEnv',
-    max_episode_steps=200,
-    kwargs={
-        "n_links": 5,
-    }
-)
-
-
 register(
     id='SimpleReacher5-v0',
     entry_point='alr_envs.classic_control:SimpleReacherEnv',
@@ -121,7 +112,6 @@ register(
 )
 
 for dim in [5, 10, 25, 50, 100]:
-
     register(
         id=f'Rosenbrock{dim}-v0',
         entry_point='alr_envs.stochastic_search:StochasticSearchEnv',
@@ -129,4 +119,4 @@ for dim in [5, 10, 25, 50, 100]:
         kwargs={
             "cost_f": Rosenbrock,
         }
-    )
\ No newline at end of file
+    )
diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py
index 3a54432..042b207 100644
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@@ -171,7 +171,3 @@ class SimpleReacherEnv(gym.Env):
     @property
     def end_effector(self):
         return self._joints[self.n_links].T
-
-
-def angle_normalize(x):
-    return ((x + np.pi) % (2 * np.pi)) - np.pi
diff --git a/alr_envs/mujoco/alr_reacher.py b/alr_envs/mujoco/alr_reacher.py
index a7e4e18..7ae28da 100644
--- a/alr_envs/mujoco/alr_reacher.py
+++ b/alr_envs/mujoco/alr_reacher.py
@@ -1,15 +1,21 @@
-import numpy as np
 import os
+
+import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
 
+from alr_envs.utils.utils import angle_normalize
+
 
 class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self, steps_before_reward=200, n_links=5):
+    def __init__(self, steps_before_reward=200, n_links=5, balance=False):
         self._steps = 0
         self.steps_before_reward = steps_before_reward
         self.n_links = n_links
 
+        self.balance = balance
+        self.balance_weight = 1.0
+
         self.reward_weight = 1
         if steps_before_reward == 200:
             self.reward_weight = 200
@@ -29,20 +35,22 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
     def step(self, a):
         self._steps += 1
 
-        reward_dist = 0
-        angular_vel = 0
+        reward_dist = 0.0
+        angular_vel = 0.0
         if self._steps >= self.steps_before_reward:
             vec = self.get_body_com("fingertip") - self.get_body_com("target")
             reward_dist -= self.reward_weight * np.linalg.norm(vec)
             angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
         reward_ctrl = - np.square(a).sum()
+        reward_balance = - self.balance_weight * np.abs(
+            angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
 
-        reward = reward_dist + reward_ctrl + angular_vel
+        reward = reward_dist + reward_ctrl + angular_vel + reward_balance
         self.do_simulation(a, self.frame_skip)
         ob = self._get_obs()
         done = False
         return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
-                                      velocity=angular_vel,
+                                      velocity=angular_vel, reward_balance=reward_balance,
                                       end_effector=self.get_body_com("fingertip").copy(),
                                       goal=self.goal if hasattr(self, "goal") else None)
 
diff --git a/alr_envs/stochastic_search/__init__.py b/alr_envs/stochastic_search/__init__.py
new file mode 100644
index 0000000..257680f
--- /dev/null
+++ b/alr_envs/stochastic_search/__init__.py
@@ -0,0 +1 @@
+from alr_envs.stochastic_search.stochastic_search import StochasticSearchEnv
diff --git a/alr_envs/stochastic_search/functions/__init__.py b/alr_envs/stochastic_search/functions/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/stochastic_search/functions/f_base.py b/alr_envs/stochastic_search/functions/f_base.py
new file mode 100644
index 0000000..31b4323
--- /dev/null
+++ b/alr_envs/stochastic_search/functions/f_base.py
@@ -0,0 +1,76 @@
+import numpy as np
+import scipy.stats as scistats
+
+np.seterr(divide='ignore', invalid='ignore')
+
+
+class BaseObjective(object):
+    def __init__(self, dim, int_opt=None, val_opt=None, alpha=None, beta=None):
+        self.dim = dim
+        self.alpha = alpha
+        self.beta = beta
+        # check if optimal parameter is in interval...
+        if int_opt is not None:
+            self.x_opt = np.random.uniform(int_opt[0], int_opt[1], size=(1, dim))
+        # ... or based on a single value
+        elif val_opt is not None:
+            self.one_pm = np.where(np.random.rand(1, dim) > 0.5, 1, -1)
+            self.x_opt = val_opt * self.one_pm
+        else:
+            raise ValueError("Optimal value or interval has to be defined")
+        self.f_opt = np.round(np.clip(scistats.cauchy.rvs(loc=0, scale=100, size=1)[0], -1000, 1000), decimals=2)
+        self.i = np.arange(self.dim)
+        self._lambda_alpha = None
+        self._q = None
+        self._r = None
+
+    def __call__(self, x):
+        return self.evaluate_full(x)
+
+    def evaluate_full(self, x):
+        raise NotImplementedError("Subclasses should implement this!")
+
+    def gs(self):
+        # Gram Schmidt ortho-normalization
+        a = np.random.randn(self.dim, self.dim)
+        b, _ = np.linalg.qr(a)
+        return b
+
+    # TODO: property probably unnecessary
+    @property
+    def q(self):
+        if self._q is None:
+            self._q = self.gs()
+        return self._q
+
+    @property
+    def r(self):
+        if self._r is None:
+            self._r = self.gs()
+        return self._r
+
+    @property
+    def lambda_alpha(self):
+        if self._lambda_alpha is None:
+            if isinstance(self.alpha, int):
+                lambda_ii = np.power(self.alpha, 1 / 2 * self.i / (self.dim - 1))
+                self._lambda_alpha = np.diag(lambda_ii)
+            else:
+                lambda_ii = np.power(self.alpha[:, None], 1 / 2 * self.i[None, :] / (self.dim - 1))
+                self._lambda_alpha = np.stack([np.diag(l_ii) for l_ii in lambda_ii])
+        return self._lambda_alpha
+
+    @staticmethod
+    def f_pen(x):
+        return np.sum(np.maximum(0, np.abs(x) - 5), axis=1)
+
+    def t_asy_beta(self, x):
+        # exp = np.power(x, 1 + self.beta * self.i[:, None] / (self.input_dim - 1) * np.sqrt(x))
+        # return np.where(x > 0, exp, x)
+        return x
+
+    def t_osz(self, x):
+        x_hat = np.where(x != 0, np.log(np.abs(x)), 0)
+        c_1 = np.where(x > 0, 10, 5.5)
+        c_2 = np.where(x > 0, 7.9, 3.1)
+        return np.sign(x) * np.exp(x_hat + 0.049 * (np.sin(c_1 * x_hat) + np.sin(c_2 * x_hat)))
diff --git a/alr_envs/stochastic_search/functions/f_rosenbrock.py b/alr_envs/stochastic_search/functions/f_rosenbrock.py
new file mode 100644
index 0000000..a0f6bc4
--- /dev/null
+++ b/alr_envs/stochastic_search/functions/f_rosenbrock.py
@@ -0,0 +1,56 @@
+import numpy as np
+
+from alr_envs.stochastic_search.functions.f_base import BaseObjective
+
+
+class Rosenbrock(BaseObjective):
+    def __init__(self, dim, int_opt=(-3., 3.)):
+        super(Rosenbrock, self).__init__(dim, int_opt=int_opt)
+        self.c = np.maximum(1, np.sqrt(self.dim) / 8)
+
+    def evaluate_full(self, x):
+        x = np.atleast_2d(x)
+        assert x.shape[1] == self.dim
+
+        z = self.c * (x - self.x_opt) + 1
+        z_end = z[:, 1:]
+        z_begin = z[:, :-1]
+
+        a = z_begin ** 2 - z_end
+        b = z_begin - 1
+
+        return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt
+
+
+class RosenbrockRotated(BaseObjective):
+    def __init__(self, dim, int_opt=(-3., 3.)):
+        super(RosenbrockRotated, self).__init__(dim, int_opt=int_opt)
+        self.c = np.maximum(1, np.sqrt(self.dim) / 8)
+
+    def evaluate_full(self, x):
+        x = np.atleast_2d(x)
+        assert x.shape[1] == self.dim
+
+        z = (self.c * self.r @ x.T + 1 / 2).T
+        a = z[:, :-1] ** 2 - z[:, 1:]
+        b = z[:, :-1] - 1
+
+        return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt
+
+
+class RosenbrockRaw(BaseObjective):
+    def __init__(self, dim, int_opt=(-3., 3.)):
+        super(RosenbrockRaw, self).__init__(dim, int_opt=int_opt)
+        self.x_opt = np.ones((1, dim))
+        self.f_opt = 0
+
+    def evaluate_full(self, x):
+        x = np.atleast_2d(x)
+        assert x.shape[1] == self.dim
+
+        a = x[:, :-1] ** 2 - x[:, 1:]
+        b = x[:, :-1] - 1
+
+        out = np.sum(100 * a ** 2 + b ** 2, axis=1)
+
+        return out
diff --git a/alr_envs/stochastic_search/stochastic_search.py b/alr_envs/stochastic_search/stochastic_search.py
new file mode 100644
index 0000000..fd9af8d
--- /dev/null
+++ b/alr_envs/stochastic_search/stochastic_search.py
@@ -0,0 +1,22 @@
+import gym
+import numpy as np
+
+from alr_envs.stochastic_search.functions.f_base import BaseObjective
+
+
+class StochasticSearchEnv(gym.Env):
+
+    def __init__(self, cost_f: BaseObjective):
+        self.cost_f = cost_f
+
+        self.action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cost_f.dim,), dtype=np.float64)
+        self.observation_space = gym.spaces.Box(low=(), high=(), shape=(), dtype=np.float64)
+
+    def step(self, action):
+        return np.zeros(self.observation_space.shape), np.squeeze(-self.cost_f(action)), True, {}
+
+    def reset(self):
+        return np.zeros(self.observation_space.shape)
+
+    def render(self, mode='human'):
+        pass
diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py
new file mode 100644
index 0000000..0bca03e
--- /dev/null
+++ b/alr_envs/utils/utils.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+
+def angle_normalize(x, type="deg"):
+    """
+    normalize angle x to [-pi,pi].
+    Args:
+        x: Angle in either degrees or radians
+        type: one of "deg" or "rad" for x being in degrees or radians
+
+    Returns:
+
+    """
+    if type == "deg":
+        return ((x + np.pi) % (2 * np.pi)) - np.pi
+    elif type == "rad":
+        two_pi = 2 * np.pi
+        return x - two_pi * np.floor((x + np.pi) / two_pi)
+    else:
+        raise ValueError(f"Invalid type {type}. Choose on of 'deg' or 'rad'.")