New SDE feature: softmax activation of latent
This commit is contained in:
parent
ffbf2b3fe5
commit
e985d892ca
@ -136,7 +136,7 @@ class UniversalGaussianDistribution(SB3_Distribution):
|
|||||||
:param action_dim: Dimension of the action space.
|
:param action_dim: Dimension of the action space.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, action_dim: int, use_sde: bool = False, neural_strength: Strength = Strength.DIAG, cov_strength: Strength = Strength.DIAG, parameterization_type: ParametrizationType = ParametrizationType.NONE, enforce_positive_type: EnforcePositiveType = EnforcePositiveType.ABS, prob_squashing_type: ProbSquashingType = ProbSquashingType.NONE, epsilon=1e-3, sde_learn_features=False):
|
def __init__(self, action_dim: int, use_sde: bool = False, neural_strength: Strength = Strength.DIAG, cov_strength: Strength = Strength.DIAG, parameterization_type: ParametrizationType = ParametrizationType.NONE, enforce_positive_type: EnforcePositiveType = EnforcePositiveType.ABS, prob_squashing_type: ProbSquashingType = ProbSquashingType.NONE, epsilon=1e-3, sde_learn_features=False, sde_latent_softmax=False):
|
||||||
super(UniversalGaussianDistribution, self).__init__()
|
super(UniversalGaussianDistribution, self).__init__()
|
||||||
self.action_dim = action_dim
|
self.action_dim = action_dim
|
||||||
self.par_strength = cast_to_enum(neural_strength, Strength)
|
self.par_strength = cast_to_enum(neural_strength, Strength)
|
||||||
@ -155,6 +155,7 @@ class UniversalGaussianDistribution(SB3_Distribution):
|
|||||||
|
|
||||||
self.use_sde = use_sde
|
self.use_sde = use_sde
|
||||||
self.learn_features = sde_learn_features
|
self.learn_features = sde_learn_features
|
||||||
|
self.sde_latent_softmax = sde_latent_softmax
|
||||||
|
|
||||||
assert (self.par_type != ParametrizationType.NONE) == (
|
assert (self.par_type != ParametrizationType.NONE) == (
|
||||||
self.cov_strength == Strength.FULL), 'You should set an ParameterizationType iff the cov-strength is full'
|
self.cov_strength == Strength.FULL), 'You should set an ParameterizationType iff the cov-strength is full'
|
||||||
@ -349,6 +350,8 @@ class UniversalGaussianDistribution(SB3_Distribution):
|
|||||||
def get_noise(self, latent_sde: th.Tensor) -> th.Tensor:
|
def get_noise(self, latent_sde: th.Tensor) -> th.Tensor:
|
||||||
latent_sde = latent_sde if self.learn_features else latent_sde.detach()
|
latent_sde = latent_sde if self.learn_features else latent_sde.detach()
|
||||||
latent_sde = latent_sde[..., -self.latent_sde_dim:]
|
latent_sde = latent_sde[..., -self.latent_sde_dim:]
|
||||||
|
if self.sde_latent_softmax:
|
||||||
|
latent_sde = th.softmax(dim=-1)
|
||||||
latent_sde = th.nn.functional.normalize(latent_sde, dim=-1)
|
latent_sde = th.nn.functional.normalize(latent_sde, dim=-1)
|
||||||
# Default case: only one exploration matrix
|
# Default case: only one exploration matrix
|
||||||
if len(latent_sde) == 1 or len(latent_sde) != len(self.exploration_matrices):
|
if len(latent_sde) == 1 or len(latent_sde) != len(self.exploration_matrices):
|
||||||
|
Loading…
Reference in New Issue
Block a user