2023-04-19 19:07:17 +02:00
|
|
|
from enum import Enum
|
|
|
|
import numpy as np
|
|
|
|
import torch as th
|
|
|
|
import scipy.spatial
|
|
|
|
from torch import nn
|
|
|
|
from stable_baselines3.common.distributions import Distribution as SB3_Distribution
|
|
|
|
from stable_baselines3.common.distributions import sum_independent_dims
|
|
|
|
from torch.distributions import Normal
|
2023-05-03 15:10:25 +02:00
|
|
|
import torch.nn.functional as F
|
2023-04-19 19:07:17 +02:00
|
|
|
|
2023-05-04 19:29:40 +02:00
|
|
|
from priorConditionedAnnealing import noise, kernel
|
2023-05-03 23:17:19 +02:00
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
class Par_Strength(Enum):
|
|
|
|
SCALAR = 'SCALAR'
|
|
|
|
DIAG = 'DIAG'
|
|
|
|
CONT_SCALAR = 'CONT_SCALAR'
|
|
|
|
CONT_DIAG = 'CONT_DIAG'
|
|
|
|
CONT_HYBRID = 'CONT_HYBRID'
|
|
|
|
|
|
|
|
|
|
|
|
class EnforcePositiveType(Enum):
|
|
|
|
# This need to be implemented in this ugly fashion,
|
|
|
|
# because cloudpickle does not like more complex enums
|
|
|
|
|
|
|
|
NONE = 0
|
|
|
|
SOFTPLUS = 1
|
|
|
|
ABS = 2
|
|
|
|
RELU = 3
|
|
|
|
LOG = 4
|
|
|
|
|
|
|
|
def apply(self, x):
|
|
|
|
# aaaaaa
|
|
|
|
return [nn.Identity(), nn.Softplus(beta=1, threshold=20), th.abs, nn.ReLU(inplace=False), th.log][self.value](x)
|
|
|
|
|
|
|
|
|
|
|
|
class Avaible_Kernel_Funcs(Enum):
|
|
|
|
RBF = 0
|
|
|
|
SE = 1
|
2023-05-04 19:29:40 +02:00
|
|
|
BROWN = 2
|
|
|
|
PINK = 3
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
def get_func(self):
|
|
|
|
# stil aaaaaaaa
|
2023-05-04 19:29:40 +02:00
|
|
|
return [kernel.rbf, kernel.se, kernel.brown, kernel.pink][self.value]
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
|
2023-05-21 16:18:42 +02:00
|
|
|
class Avaible_Noise_Funcs(Enum):
|
|
|
|
WHITE = 0
|
|
|
|
PINK = 1
|
|
|
|
COLOR = 2
|
|
|
|
PERLIN = 3
|
2023-06-27 19:01:43 +02:00
|
|
|
HARMONICPERLIN = 4
|
2023-07-12 22:32:35 +02:00
|
|
|
DIRTYPERLIN = 5
|
|
|
|
SDE = 6
|
2023-05-21 16:18:42 +02:00
|
|
|
|
|
|
|
def get_func(self):
|
|
|
|
# stil aaaaaaaa
|
2023-07-12 22:32:35 +02:00
|
|
|
return [noise.White_Noise, noise.Pink_Noise, noise.Colored_Noise, noise.Perlin_Noise, noise.Harmonic_Perlin_Noise, noise.Dirty_Perlin_Noise, noise.SDE_Noise][self.value]
|
2023-05-21 16:18:42 +02:00
|
|
|
|
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
def cast_to_enum(inp, Class):
|
|
|
|
if isinstance(inp, Enum):
|
|
|
|
return inp
|
|
|
|
else:
|
|
|
|
return Class[inp]
|
|
|
|
|
|
|
|
|
|
|
|
def cast_to_kernel(inp):
|
2023-05-03 17:02:22 +02:00
|
|
|
if callable(inp):
|
2023-04-19 19:07:17 +02:00
|
|
|
return inp
|
|
|
|
else:
|
|
|
|
func, *pars = inp.split('_')
|
2023-05-03 17:02:22 +02:00
|
|
|
pars = [float(par) for par in pars]
|
|
|
|
return Avaible_Kernel_Funcs[func].get_func()(*pars)
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
|
2023-05-21 16:18:42 +02:00
|
|
|
def cast_to_Noise(Inp, known_shape):
|
|
|
|
if callable(Inp): # TODO: Allow instantiated?
|
|
|
|
return Inp(known_shape)
|
|
|
|
else:
|
|
|
|
func, *pars = Inp.split('_')
|
|
|
|
pars = [float(par) for par in pars]
|
|
|
|
return Avaible_Noise_Funcs[func].get_func()(known_shape, *pars)
|
|
|
|
|
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
class PCA_Distribution(SB3_Distribution):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
action_dim: int,
|
|
|
|
par_strength: Par_Strength = Par_Strength.CONT_DIAG,
|
2023-05-04 19:29:40 +02:00
|
|
|
kernel_func=kernel.rbf(),
|
2023-05-04 15:39:57 +02:00
|
|
|
init_std: float = 1,
|
|
|
|
cond_noise: float = 0,
|
2023-04-19 19:07:17 +02:00
|
|
|
window: int = 64,
|
|
|
|
epsilon: float = 1e-6,
|
2023-05-04 12:18:07 +02:00
|
|
|
skip_conditioning: bool = False,
|
|
|
|
Base_Noise=noise.White_Noise,
|
2023-04-19 19:07:17 +02:00
|
|
|
):
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
self.action_dim = action_dim
|
|
|
|
self.kernel_func = cast_to_kernel(kernel_func)
|
2023-05-03 17:02:22 +02:00
|
|
|
self.par_strength = cast_to_enum(par_strength, Par_Strength)
|
2023-04-19 19:07:17 +02:00
|
|
|
self.init_std = init_std
|
2023-05-04 15:39:57 +02:00
|
|
|
self.cond_noise = cond_noise
|
2023-04-19 19:07:17 +02:00
|
|
|
self.window = window
|
|
|
|
self.epsilon = epsilon
|
2023-05-04 12:18:07 +02:00
|
|
|
self.skip_conditioning = skip_conditioning
|
2023-04-19 19:07:17 +02:00
|
|
|
|
2023-05-21 16:18:42 +02:00
|
|
|
self.base_noise = cast_to_Noise(Base_Noise, (1, action_dim))
|
2023-05-03 23:17:19 +02:00
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
# Premature optimization is the root of all evil
|
|
|
|
self._build_conditioner()
|
|
|
|
# *Optimizes it anyways*
|
|
|
|
|
|
|
|
def proba_distribution_net(self, latent_dim: int):
|
|
|
|
mu_net = nn.Linear(latent_dim, self.action_dim)
|
2023-05-21 17:08:45 +02:00
|
|
|
std_net = StdNet(latent_dim, self.action_dim, self.init_std, self.par_strength, self.epsilon)
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
return mu_net, std_net
|
|
|
|
|
|
|
|
def proba_distribution(
|
|
|
|
self, mean_actions: th.Tensor, std_actions: th.Tensor) -> SB3_Distribution:
|
|
|
|
self.distribution = Normal(
|
|
|
|
mean_actions, std_actions)
|
|
|
|
return self
|
|
|
|
|
|
|
|
def log_prob(self, actions: th.Tensor) -> th.Tensor:
|
2023-05-21 18:15:11 +02:00
|
|
|
return sum_independent_dims(self.distribution.log_prob(actions.to(self.distribution.mean.device)))
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
def entropy(self) -> th.Tensor:
|
|
|
|
return sum_independent_dims(self.distribution.entropy())
|
|
|
|
|
2023-05-21 16:18:42 +02:00
|
|
|
def get_actions(self, deterministic: bool = False, trajectory: th.Tensor = None) -> th.Tensor:
|
|
|
|
"""
|
|
|
|
Return actions according to the probability distribution.
|
|
|
|
|
|
|
|
:param deterministic:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
if deterministic:
|
|
|
|
return self.mode()
|
2023-05-21 17:26:21 +02:00
|
|
|
return self.sample(traj=trajectory)
|
2023-05-21 16:18:42 +02:00
|
|
|
|
2023-05-04 12:18:07 +02:00
|
|
|
def sample(self, traj: th.Tensor, f_sigma: int = 1, epsilon=None) -> th.Tensor:
|
2023-05-21 20:16:52 +02:00
|
|
|
assert self.skip_conditioning or type(traj) != type(None), 'A past trajectory has to be supplied if conditinoning is performed'
|
2023-05-21 18:15:11 +02:00
|
|
|
pi_mean, pi_std = self.distribution.mean.cpu(), self.distribution.scale.cpu()
|
2023-04-19 19:07:17 +02:00
|
|
|
rho_mean, rho_std = self._conditioning_engine(traj, pi_mean, pi_std)
|
2023-05-04 12:18:07 +02:00
|
|
|
rho_std *= f_sigma
|
2023-04-19 19:07:17 +02:00
|
|
|
eta = self._get_rigged(pi_mean, pi_std,
|
2023-05-03 23:17:19 +02:00
|
|
|
rho_mean, rho_std,
|
|
|
|
epsilon)
|
2023-04-19 19:07:17 +02:00
|
|
|
# reparameterization with rigged samples
|
|
|
|
actions = pi_mean + pi_std * eta
|
2023-05-22 17:58:50 +02:00
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
self.gaussian_actions = actions
|
|
|
|
return actions
|
|
|
|
|
|
|
|
def is_contextual(self):
|
2023-05-21 20:16:52 +02:00
|
|
|
return True # TODO: Remove, when bug for non-contextual is fixed
|
|
|
|
# Always returning True will merely waste cpu cycles
|
2023-04-19 19:07:17 +02:00
|
|
|
return self.par_strength not in [Par_Strength.SCALAR, Par_Strength.DIAG]
|
|
|
|
|
2023-05-03 23:17:19 +02:00
|
|
|
def _get_rigged(self, pi_mean, pi_std, rho_mean, rho_std, epsilon=None):
|
2023-04-19 19:07:17 +02:00
|
|
|
with th.no_grad():
|
2023-05-03 23:17:19 +02:00
|
|
|
if epsilon == None:
|
|
|
|
epsilon = self.base_noise(pi_mean.shape)
|
2023-04-19 19:07:17 +02:00
|
|
|
|
2023-05-04 12:18:07 +02:00
|
|
|
if self.skip_conditioning:
|
|
|
|
return epsilon.detach()
|
|
|
|
|
2023-04-19 19:07:17 +02:00
|
|
|
Delta = rho_mean - pi_mean
|
|
|
|
Pi_mu = 1 / pi_std
|
|
|
|
Pi_sigma = rho_std / pi_std
|
|
|
|
|
|
|
|
eta = Pi_mu * Delta + Pi_sigma * epsilon
|
|
|
|
|
|
|
|
return eta.detach()
|
|
|
|
|
2023-05-03 15:10:25 +02:00
|
|
|
def _pad_and_cut_trajectory(self, traj, value=0):
|
|
|
|
if traj.shape[-2] < self.window:
|
2023-05-21 20:16:52 +02:00
|
|
|
if traj.shape[-2] == 0:
|
|
|
|
shape = list(traj.shape)
|
|
|
|
shape[-2] = 1
|
|
|
|
traj = th.ones(shape)*value
|
2023-05-03 15:10:25 +02:00
|
|
|
missing = self.window - traj.shape[-2]
|
2023-05-03 17:02:22 +02:00
|
|
|
return F.pad(input=traj, pad=(0, 0, missing, 0, 0, 0), value=value)
|
2023-05-04 15:39:57 +02:00
|
|
|
return traj[:, -self.window:, :]
|
2023-05-03 15:10:25 +02:00
|
|
|
|
|
|
|
def _conditioning_engine(self, trajectory, pi_mean, pi_std):
|
2023-05-21 17:37:45 +02:00
|
|
|
if self.skip_conditioning:
|
|
|
|
return pi_mean, pi_std
|
|
|
|
|
2023-05-03 15:10:25 +02:00
|
|
|
traj = self._pad_and_cut_trajectory(trajectory)
|
2023-05-21 20:16:52 +02:00
|
|
|
|
2023-05-21 18:15:11 +02:00
|
|
|
# Numpy is fun
|
2023-05-21 20:16:52 +02:00
|
|
|
y_np = np.append(np.swapaxes(traj, -1, -2), np.repeat(np.expand_dims(pi_mean, -1), traj.shape[0], 0), -1)
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
with th.no_grad():
|
|
|
|
conditioners = th.Tensor(self._adapt_conditioner(pi_std))
|
|
|
|
y = th.Tensor(y_np)
|
|
|
|
|
2023-05-21 20:16:52 +02:00
|
|
|
S = th.cholesky_solve(self.Sig12.expand(conditioners.shape[:-1]).unsqueeze(-1), conditioners).squeeze(-1)
|
2023-04-19 19:07:17 +02:00
|
|
|
|
|
|
|
rho_mean = th.einsum('bai,bai->ba', S, y)
|
|
|
|
rho_std = self.Sig22 - (S @ self.Sig12)
|
|
|
|
|
|
|
|
return rho_mean, rho_std
|
|
|
|
|
|
|
|
def _build_conditioner(self):
|
|
|
|
# Precomputes the Cholesky decomp of the cov matrix to be used as a pseudoinverse.
|
|
|
|
# Also precomputes some auxilary stuff for _adapt_conditioner.
|
|
|
|
w = self.window
|
|
|
|
Z = np.linspace(0, w, w+1).reshape(-1, 1)
|
|
|
|
X = np.array([w]).reshape(-1, 1)
|
|
|
|
|
2023-05-04 15:39:57 +02:00
|
|
|
Sig11 = self.kernel_func(
|
|
|
|
Z, Z) + np.diag(np.hstack((np.repeat(self.cond_noise**2, w), 0)))
|
2023-04-19 19:07:17 +02:00
|
|
|
self.Sig12 = th.Tensor(self.kernel_func(Z, X)).squeeze(-1)
|
|
|
|
self.Sig22 = th.Tensor(self.kernel_func(
|
|
|
|
X, X)).squeeze(-1).squeeze(-1)
|
|
|
|
self.conditioner = np.linalg.cholesky(Sig11)
|
|
|
|
self.adapt_norm = np.linalg.norm(
|
|
|
|
self.conditioner[-1, :][:-1], axis=-1)**2
|
|
|
|
|
|
|
|
def _adapt_conditioner(self, pi_std):
|
|
|
|
# We can not actually precompute the cov inverse completely,
|
|
|
|
# since it also depends on the current policies sigma.
|
|
|
|
# But, because of the way the Cholesky Decomp works,
|
|
|
|
# we can use the precomputed L (conditioner)
|
|
|
|
# (which is computed by an efficient LAPACK implementation)
|
|
|
|
# and adapt it for our new k(x_w+1,x_w+1) value (in python)
|
|
|
|
# (Which is dependent on pi)
|
|
|
|
# S_{ij} = \frac{1}{D_j} \left( A_{ij} - \sum_{k=1}^{j-1} S_{ik} S_{jk} D_k \right), \qquad\text{for } i>j
|
|
|
|
# https://martin-thoma.com/images/2012/07/cholesky-zerlegung-numerik.png
|
|
|
|
# This way conditioning of the GP can be done in O(dim(A)) time.
|
|
|
|
if not self.is_contextual():
|
2023-05-03 17:02:22 +02:00
|
|
|
# TODO: fix, this does not work
|
2023-04-19 19:07:17 +02:00
|
|
|
# safe inplace
|
|
|
|
self.conditioner[-1, -
|
|
|
|
1] = np.sqrt(pi_std**2 + self.Sig22 - self.adapt_norm)
|
|
|
|
return np.expand_dims(np.expand_dims(self.conditioner, 0), 0)
|
|
|
|
else:
|
|
|
|
conditioner = np.zeros(
|
|
|
|
(pi_std.shape[0], pi_std.shape[1]) + self.conditioner.shape)
|
|
|
|
conditioner[:, :] = self.conditioner
|
|
|
|
conditioner[:, :, -1, -
|
|
|
|
1] = np.sqrt(pi_std**2 + self.Sig22 - self.adapt_norm)
|
|
|
|
return conditioner
|
|
|
|
|
|
|
|
def mode(self) -> th.Tensor:
|
|
|
|
return self.distribution.mean
|
|
|
|
|
|
|
|
def actions_from_params(
|
|
|
|
self, mean: th.Tensor, std: th.Tensor, deterministic: bool = False
|
|
|
|
) -> th.Tensor:
|
|
|
|
self.proba_distribution(mean, std)
|
|
|
|
return self.get_actions(deterministic=deterministic)
|
|
|
|
|
|
|
|
def log_prob_from_params(self, mean: th.Tensor, std: th.Tensor):
|
|
|
|
actions = self.actions_from_params(mean, std)
|
|
|
|
log_prob = self.log_prob(actions)
|
|
|
|
return actions, log_prob
|
|
|
|
|
|
|
|
def print_info(self, traj: th.Tensor):
|
|
|
|
pi_mean, pi_std = self.distribution.mean, self.distribution.scale,
|
|
|
|
rho_mean, rho_std = self._conditioning_engine(traj, pi_mean, pi_std)
|
|
|
|
eta = self._get_rigged(pi_mean, pi_std,
|
|
|
|
rho_mean, rho_std)
|
|
|
|
print('pi ~ N('+str(pi_mean)+','+str(pi_std)+')')
|
|
|
|
print('rho ~ N('+str(rho_mean)+','+str(rho_std)+')')
|
|
|
|
|
|
|
|
|
|
|
|
class StdNet(nn.Module):
|
|
|
|
def __init__(self, latent_dim: int, action_dim: int, std_init: float, par_strength: bool, epsilon: float):
|
|
|
|
super().__init__()
|
|
|
|
self.action_dim = action_dim
|
|
|
|
self.latent_dim = latent_dim
|
|
|
|
self.std_init = std_init
|
|
|
|
self.par_strength = par_strength
|
|
|
|
self.enforce_positive_type = EnforcePositiveType.SOFTPLUS
|
|
|
|
|
|
|
|
self.epsilon = epsilon
|
|
|
|
|
|
|
|
if self.par_strength == Par_Strength.SCALAR:
|
|
|
|
self.param = nn.Parameter(
|
|
|
|
th.Tensor([std_init]), requires_grad=True)
|
|
|
|
elif self.par_strength == Par_Strength.DIAG:
|
|
|
|
self.param = nn.Parameter(
|
|
|
|
th.Tensor(th.ones(action_dim)*std_init), requires_grad=True)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_SCALAR:
|
|
|
|
self.net = nn.Linear(latent_dim, 1)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_HYBRID:
|
|
|
|
self.net = nn.Linear(latent_dim, 1)
|
|
|
|
self.param = nn.Parameter(
|
|
|
|
th.Tensor(th.ones(action_dim)*std_init), requires_grad=True)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_DIAG:
|
|
|
|
self.net = nn.Linear(latent_dim, self.action_dim)
|
|
|
|
|
|
|
|
def forward(self, x: th.Tensor) -> th.Tensor:
|
|
|
|
if self.par_strength == Par_Strength.SCALAR:
|
|
|
|
return self._ensure_positive_func(
|
|
|
|
th.ones(self.action_dim) * self.param[0])
|
|
|
|
elif self.par_strength == Par_Strength.DIAG:
|
|
|
|
return self._ensure_positive_func(self.param)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_SCALAR:
|
|
|
|
cont = self.net(x)
|
2023-05-21 18:15:11 +02:00
|
|
|
diag_chol = th.ones(self.action_dim, device=cont.device) * cont * self.std_init
|
2023-04-19 19:07:17 +02:00
|
|
|
return self._ensure_positive_func(diag_chol)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_HYBRID:
|
|
|
|
cont = self.net(x)
|
|
|
|
return self._ensure_positive_func(self.param * cont)
|
|
|
|
elif self.par_strength == Par_Strength.CONT_DIAG:
|
|
|
|
cont = self.net(x)
|
|
|
|
diag_chol = cont * self.std_init
|
|
|
|
return self._ensure_positive_func(diag_chol)
|
|
|
|
|
|
|
|
raise Exception()
|
|
|
|
|
|
|
|
def _ensure_positive_func(self, x):
|
|
|
|
return self.enforce_positive_type.apply(x) + self.epsilon
|
|
|
|
|
|
|
|
def string(self):
|
|
|
|
return '<StdNet />'
|
|
|
|
|
|
|
|
|
|
|
|
def test():
|
|
|
|
mu = th.Tensor([[0.0, 0.0]])
|
|
|
|
sigma = th.Tensor([[0.9, 0.1]])
|
|
|
|
traj = th.Tensor([[[-1.0, -1.0], [-0.4, -0.4], [0.3, 0.3]]])
|
|
|
|
|
|
|
|
d = PCA_Distribution(2, window=3)
|
|
|
|
d.proba_distribution(mu, sigma)
|
|
|
|
d.print_info(traj)
|
|
|
|
print(d.sample(traj))
|
|
|
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
test()
|