Added balancing reacher task and stochastic search task interface
This commit is contained in:
parent
741f1cb636
commit
58131ef470
@ -12,6 +12,37 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRReacherSparse-v0',
|
||||||
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=200,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 200,
|
||||||
|
"n_links": 5,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRReacherSparseBalanced-v0',
|
||||||
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=200,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 200,
|
||||||
|
"n_links": 5,
|
||||||
|
"balance": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRReacherShort-v0',
|
||||||
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=50,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 0,
|
||||||
|
"n_links": 5,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRReacherShortSparse-v0',
|
id='ALRReacherShortSparse-v0',
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
@ -22,46 +53,6 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
|
||||||
id='ALRReacherShort-v0',
|
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
|
||||||
max_episode_steps=50,
|
|
||||||
kwargs={
|
|
||||||
"steps_before_reward": 40,
|
|
||||||
"n_links": 5,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
register(
|
|
||||||
id='ALRReacherSparse-v0',
|
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
|
||||||
max_episode_steps=200,
|
|
||||||
kwargs={
|
|
||||||
"steps_before_reward": 200,
|
|
||||||
"n_links": 5,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
register(
|
|
||||||
id='ALRReacher100-v0',
|
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
|
||||||
max_episode_steps=200,
|
|
||||||
kwargs={
|
|
||||||
"steps_before_reward": 100,
|
|
||||||
"n_links": 5,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
register(
|
|
||||||
id='ALRReacher180-v0',
|
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
|
||||||
max_episode_steps=200,
|
|
||||||
kwargs={
|
|
||||||
"steps_before_reward": 180,
|
|
||||||
"n_links": 5,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRReacher7-v0',
|
id='ALRReacher7-v0',
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
@ -73,21 +64,31 @@ register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRReacher100_7-v0',
|
id='ALRReacherSparse-v0',
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
max_episode_steps=200,
|
max_episode_steps=200,
|
||||||
kwargs={
|
kwargs={
|
||||||
"steps_before_reward": 100,
|
"steps_before_reward": 200,
|
||||||
"n_links": 7,
|
"n_links": 7,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRReacher180_7-v0',
|
id='ALRReacher7Short-v0',
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
max_episode_steps=200,
|
max_episode_steps=50,
|
||||||
kwargs={
|
kwargs={
|
||||||
"steps_before_reward": 180,
|
"steps_before_reward": 0,
|
||||||
|
"n_links": 7,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRReacher7ShortSparse-v0',
|
||||||
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
|
max_episode_steps=50,
|
||||||
|
kwargs={
|
||||||
|
"steps_before_reward": 50,
|
||||||
"n_links": 7,
|
"n_links": 7,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -101,16 +102,6 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
|
||||||
id='SimpleReacher5-v0',
|
|
||||||
entry_point='alr_envs.classic_control:SimpleReacherEnv',
|
|
||||||
max_episode_steps=200,
|
|
||||||
kwargs={
|
|
||||||
"n_links": 5,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='SimpleReacher5-v0',
|
id='SimpleReacher5-v0',
|
||||||
entry_point='alr_envs.classic_control:SimpleReacherEnv',
|
entry_point='alr_envs.classic_control:SimpleReacherEnv',
|
||||||
@ -121,7 +112,6 @@ register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
for dim in [5, 10, 25, 50, 100]:
|
for dim in [5, 10, 25, 50, 100]:
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=f'Rosenbrock{dim}-v0',
|
id=f'Rosenbrock{dim}-v0',
|
||||||
entry_point='alr_envs.stochastic_search:StochasticSearchEnv',
|
entry_point='alr_envs.stochastic_search:StochasticSearchEnv',
|
||||||
@ -129,4 +119,4 @@ for dim in [5, 10, 25, 50, 100]:
|
|||||||
kwargs={
|
kwargs={
|
||||||
"cost_f": Rosenbrock,
|
"cost_f": Rosenbrock,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -171,7 +171,3 @@ class SimpleReacherEnv(gym.Env):
|
|||||||
@property
|
@property
|
||||||
def end_effector(self):
|
def end_effector(self):
|
||||||
return self._joints[self.n_links].T
|
return self._joints[self.n_links].T
|
||||||
|
|
||||||
|
|
||||||
def angle_normalize(x):
|
|
||||||
return ((x + np.pi) % (2 * np.pi)) - np.pi
|
|
||||||
|
@ -1,15 +1,21 @@
|
|||||||
import numpy as np
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
from gym.envs.mujoco import mujoco_env
|
from gym.envs.mujoco import mujoco_env
|
||||||
|
|
||||||
|
from alr_envs.utils.utils import angle_normalize
|
||||||
|
|
||||||
|
|
||||||
class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, steps_before_reward=200, n_links=5):
|
def __init__(self, steps_before_reward=200, n_links=5, balance=False):
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
self.steps_before_reward = steps_before_reward
|
self.steps_before_reward = steps_before_reward
|
||||||
self.n_links = n_links
|
self.n_links = n_links
|
||||||
|
|
||||||
|
self.balance = balance
|
||||||
|
self.balance_weight = 1.0
|
||||||
|
|
||||||
self.reward_weight = 1
|
self.reward_weight = 1
|
||||||
if steps_before_reward == 200:
|
if steps_before_reward == 200:
|
||||||
self.reward_weight = 200
|
self.reward_weight = 200
|
||||||
@ -29,20 +35,22 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
def step(self, a):
|
def step(self, a):
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
|
||||||
reward_dist = 0
|
reward_dist = 0.0
|
||||||
angular_vel = 0
|
angular_vel = 0.0
|
||||||
if self._steps >= self.steps_before_reward:
|
if self._steps >= self.steps_before_reward:
|
||||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||||
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
||||||
angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
||||||
reward_ctrl = - np.square(a).sum()
|
reward_ctrl = - np.square(a).sum()
|
||||||
|
reward_balance = - self.balance_weight * np.abs(
|
||||||
|
angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
|
||||||
|
|
||||||
reward = reward_dist + reward_ctrl + angular_vel
|
reward = reward_dist + reward_ctrl + angular_vel + reward_balance
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
done = False
|
done = False
|
||||||
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
|
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
|
||||||
velocity=angular_vel,
|
velocity=angular_vel, reward_balance=reward_balance,
|
||||||
end_effector=self.get_body_com("fingertip").copy(),
|
end_effector=self.get_body_com("fingertip").copy(),
|
||||||
goal=self.goal if hasattr(self, "goal") else None)
|
goal=self.goal if hasattr(self, "goal") else None)
|
||||||
|
|
||||||
|
1
alr_envs/stochastic_search/__init__.py
Normal file
1
alr_envs/stochastic_search/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from alr_envs.stochastic_search.stochastic_search import StochasticSearchEnv
|
0
alr_envs/stochastic_search/functions/__init__.py
Normal file
0
alr_envs/stochastic_search/functions/__init__.py
Normal file
76
alr_envs/stochastic_search/functions/f_base.py
Normal file
76
alr_envs/stochastic_search/functions/f_base.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import numpy as np
|
||||||
|
import scipy.stats as scistats
|
||||||
|
|
||||||
|
np.seterr(divide='ignore', invalid='ignore')
|
||||||
|
|
||||||
|
|
||||||
|
class BaseObjective(object):
|
||||||
|
def __init__(self, dim, int_opt=None, val_opt=None, alpha=None, beta=None):
|
||||||
|
self.dim = dim
|
||||||
|
self.alpha = alpha
|
||||||
|
self.beta = beta
|
||||||
|
# check if optimal parameter is in interval...
|
||||||
|
if int_opt is not None:
|
||||||
|
self.x_opt = np.random.uniform(int_opt[0], int_opt[1], size=(1, dim))
|
||||||
|
# ... or based on a single value
|
||||||
|
elif val_opt is not None:
|
||||||
|
self.one_pm = np.where(np.random.rand(1, dim) > 0.5, 1, -1)
|
||||||
|
self.x_opt = val_opt * self.one_pm
|
||||||
|
else:
|
||||||
|
raise ValueError("Optimal value or interval has to be defined")
|
||||||
|
self.f_opt = np.round(np.clip(scistats.cauchy.rvs(loc=0, scale=100, size=1)[0], -1000, 1000), decimals=2)
|
||||||
|
self.i = np.arange(self.dim)
|
||||||
|
self._lambda_alpha = None
|
||||||
|
self._q = None
|
||||||
|
self._r = None
|
||||||
|
|
||||||
|
def __call__(self, x):
|
||||||
|
return self.evaluate_full(x)
|
||||||
|
|
||||||
|
def evaluate_full(self, x):
|
||||||
|
raise NotImplementedError("Subclasses should implement this!")
|
||||||
|
|
||||||
|
def gs(self):
|
||||||
|
# Gram Schmidt ortho-normalization
|
||||||
|
a = np.random.randn(self.dim, self.dim)
|
||||||
|
b, _ = np.linalg.qr(a)
|
||||||
|
return b
|
||||||
|
|
||||||
|
# TODO: property probably unnecessary
|
||||||
|
@property
|
||||||
|
def q(self):
|
||||||
|
if self._q is None:
|
||||||
|
self._q = self.gs()
|
||||||
|
return self._q
|
||||||
|
|
||||||
|
@property
|
||||||
|
def r(self):
|
||||||
|
if self._r is None:
|
||||||
|
self._r = self.gs()
|
||||||
|
return self._r
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lambda_alpha(self):
|
||||||
|
if self._lambda_alpha is None:
|
||||||
|
if isinstance(self.alpha, int):
|
||||||
|
lambda_ii = np.power(self.alpha, 1 / 2 * self.i / (self.dim - 1))
|
||||||
|
self._lambda_alpha = np.diag(lambda_ii)
|
||||||
|
else:
|
||||||
|
lambda_ii = np.power(self.alpha[:, None], 1 / 2 * self.i[None, :] / (self.dim - 1))
|
||||||
|
self._lambda_alpha = np.stack([np.diag(l_ii) for l_ii in lambda_ii])
|
||||||
|
return self._lambda_alpha
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def f_pen(x):
|
||||||
|
return np.sum(np.maximum(0, np.abs(x) - 5), axis=1)
|
||||||
|
|
||||||
|
def t_asy_beta(self, x):
|
||||||
|
# exp = np.power(x, 1 + self.beta * self.i[:, None] / (self.input_dim - 1) * np.sqrt(x))
|
||||||
|
# return np.where(x > 0, exp, x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def t_osz(self, x):
|
||||||
|
x_hat = np.where(x != 0, np.log(np.abs(x)), 0)
|
||||||
|
c_1 = np.where(x > 0, 10, 5.5)
|
||||||
|
c_2 = np.where(x > 0, 7.9, 3.1)
|
||||||
|
return np.sign(x) * np.exp(x_hat + 0.049 * (np.sin(c_1 * x_hat) + np.sin(c_2 * x_hat)))
|
56
alr_envs/stochastic_search/functions/f_rosenbrock.py
Normal file
56
alr_envs/stochastic_search/functions/f_rosenbrock.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from alr_envs.stochastic_search.functions.f_base import BaseObjective
|
||||||
|
|
||||||
|
|
||||||
|
class Rosenbrock(BaseObjective):
|
||||||
|
def __init__(self, dim, int_opt=(-3., 3.)):
|
||||||
|
super(Rosenbrock, self).__init__(dim, int_opt=int_opt)
|
||||||
|
self.c = np.maximum(1, np.sqrt(self.dim) / 8)
|
||||||
|
|
||||||
|
def evaluate_full(self, x):
|
||||||
|
x = np.atleast_2d(x)
|
||||||
|
assert x.shape[1] == self.dim
|
||||||
|
|
||||||
|
z = self.c * (x - self.x_opt) + 1
|
||||||
|
z_end = z[:, 1:]
|
||||||
|
z_begin = z[:, :-1]
|
||||||
|
|
||||||
|
a = z_begin ** 2 - z_end
|
||||||
|
b = z_begin - 1
|
||||||
|
|
||||||
|
return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt
|
||||||
|
|
||||||
|
|
||||||
|
class RosenbrockRotated(BaseObjective):
|
||||||
|
def __init__(self, dim, int_opt=(-3., 3.)):
|
||||||
|
super(RosenbrockRotated, self).__init__(dim, int_opt=int_opt)
|
||||||
|
self.c = np.maximum(1, np.sqrt(self.dim) / 8)
|
||||||
|
|
||||||
|
def evaluate_full(self, x):
|
||||||
|
x = np.atleast_2d(x)
|
||||||
|
assert x.shape[1] == self.dim
|
||||||
|
|
||||||
|
z = (self.c * self.r @ x.T + 1 / 2).T
|
||||||
|
a = z[:, :-1] ** 2 - z[:, 1:]
|
||||||
|
b = z[:, :-1] - 1
|
||||||
|
|
||||||
|
return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt
|
||||||
|
|
||||||
|
|
||||||
|
class RosenbrockRaw(BaseObjective):
|
||||||
|
def __init__(self, dim, int_opt=(-3., 3.)):
|
||||||
|
super(RosenbrockRaw, self).__init__(dim, int_opt=int_opt)
|
||||||
|
self.x_opt = np.ones((1, dim))
|
||||||
|
self.f_opt = 0
|
||||||
|
|
||||||
|
def evaluate_full(self, x):
|
||||||
|
x = np.atleast_2d(x)
|
||||||
|
assert x.shape[1] == self.dim
|
||||||
|
|
||||||
|
a = x[:, :-1] ** 2 - x[:, 1:]
|
||||||
|
b = x[:, :-1] - 1
|
||||||
|
|
||||||
|
out = np.sum(100 * a ** 2 + b ** 2, axis=1)
|
||||||
|
|
||||||
|
return out
|
22
alr_envs/stochastic_search/stochastic_search.py
Normal file
22
alr_envs/stochastic_search/stochastic_search.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from alr_envs.stochastic_search.functions.f_base import BaseObjective
|
||||||
|
|
||||||
|
|
||||||
|
class StochasticSearchEnv(gym.Env):
|
||||||
|
|
||||||
|
def __init__(self, cost_f: BaseObjective):
|
||||||
|
self.cost_f = cost_f
|
||||||
|
|
||||||
|
self.action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cost_f.dim,), dtype=np.float64)
|
||||||
|
self.observation_space = gym.spaces.Box(low=(), high=(), shape=(), dtype=np.float64)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
return np.zeros(self.observation_space.shape), np.squeeze(-self.cost_f(action)), True, {}
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
return np.zeros(self.observation_space.shape)
|
||||||
|
|
||||||
|
def render(self, mode='human'):
|
||||||
|
pass
|
0
alr_envs/utils/__init__.py
Normal file
0
alr_envs/utils/__init__.py
Normal file
20
alr_envs/utils/utils.py
Normal file
20
alr_envs/utils/utils.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def angle_normalize(x, type="deg"):
|
||||||
|
"""
|
||||||
|
normalize angle x to [-pi,pi].
|
||||||
|
Args:
|
||||||
|
x: Angle in either degrees or radians
|
||||||
|
type: one of "deg" or "rad" for x being in degrees or radians
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if type == "deg":
|
||||||
|
return ((x + np.pi) % (2 * np.pi)) - np.pi
|
||||||
|
elif type == "rad":
|
||||||
|
two_pi = 2 * np.pi
|
||||||
|
return x - two_pi * np.floor((x + np.pi) / two_pi)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid type {type}. Choose on of 'deg' or 'rad'.")
|
Loading…
Reference in New Issue
Block a user