add minor docs to diffusion classes and clean up some args

2024-09-17 16:26:25 -04:00 · 2024-09-17 16:26:25 -04:00 · bc52beca1e
commit bc52beca1e
parent ef5b14f820
8 changed files with 33 additions and 98 deletions
--- a/model/common/gaussian.py
+++ b/model/common/gaussian.py
@ -18,6 +18,7 @@ class GaussianModel(torch.nn.Module):
        horizon_steps,
        network_path=None,
        device="cuda:0",
        randn_clip_value=10,
    ):
        super().__init__()
        self.device = device
@ -36,6 +37,9 @@ class GaussianModel(torch.nn.Module):
        )
        self.horizon_steps = horizon_steps
        # Clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
    def loss(
        self,
        true_action,
@ -75,7 +79,6 @@ class GaussianModel(torch.nn.Module):
        self,
        cond,
        deterministic=False,
        randn_clip_value=10,
        network_override=None,
    ):
        B = len(cond["state"]) if "state" in cond else len(cond["rgb"])
@ -87,7 +90,7 @@ class GaussianModel(torch.nn.Module):
        )
        sampled_action = dist.sample()
        sampled_action.clamp_(
-            dist.loc - randn_clip_value * dist.scale,
+            dist.loc - self.randn_clip_value * dist.scale,
-            dist.loc + randn_clip_value * dist.scale,
+            dist.loc + self.randn_clip_value * dist.scale,
        )
        return sampled_action.view(B, T, -1)
--- a/model/diffusion/diffusion_dipo.py
+++ b/model/diffusion/diffusion_dipo.py
@ -19,8 +19,7 @@ class DIPODiffusion(DiffusionModel):
        actor,
        critic,
        use_ddim=False,
-        randn_clip_value=10,
+        # modifying denoising schedule
        clamp_action=False,
        min_sampling_denoising_std=0.1,
        **kwargs,
    ):
@ -34,12 +33,6 @@ class DIPODiffusion(DiffusionModel):
        # Minimum std used in denoising process when sampling action - helps exploration
        self.min_sampling_denoising_std = min_sampling_denoising_std
        # For each denoising step, we clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
        # Whether to clamp sampled action between [-1, 1]
        self.clamp_action = clamp_action
    # ---------- RL training ----------#
    def loss_critic(self, obs, next_obs, actions, rewards, dones, gamma):
@ -110,6 +103,8 @@ class DIPODiffusion(DiffusionModel):
            x = mean + std * noise
            # clamp action at final step
-            if self.clamp_action and i == len(t_all) - 1:
+            if self.final_action_clip_value is not None and i == len(t_all) - 1:
-                x = torch.clamp(x, -1, 1)
+                x = torch.clamp(
                    x, -self.final_action_clip_value, self.final_action_clip_value
                )
        return x
--- a/model/diffusion/diffusion_dql.py
+++ b/model/diffusion/diffusion_dql.py
@ -20,8 +20,7 @@ class DQLDiffusion(DiffusionModel):
        actor,
        critic,
        use_ddim=False,
-        randn_clip_value=10,
+        # modifying denoising schedule
        clamp_action=False,
        min_sampling_denoising_std=0.1,
        **kwargs,
    ):
@ -35,12 +34,6 @@ class DQLDiffusion(DiffusionModel):
        # Minimum std used in denoising process when sampling action - helps exploration
        self.min_sampling_denoising_std = min_sampling_denoising_std
        # For each denoising step, we clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
        # Whether to clamp sampled action between [-1, 1]
        self.clamp_action = clamp_action
    # ---------- RL training ----------#
    def loss_critic(self, obs, next_obs, actions, rewards, dones, gamma):
@ -110,7 +103,7 @@ class DQLDiffusion(DiffusionModel):
            # Determine the noise level
            if deterministic and t == 0:
                std = torch.zeros_like(std)
-            elif deterministic:  # For DDPM, sample with noise
+            elif deterministic:
                std = torch.clip(std, min=1e-3)
            else:
                std = torch.clip(std, min=self.min_sampling_denoising_std)
@ -120,8 +113,10 @@ class DQLDiffusion(DiffusionModel):
            x = mean + std * noise
            # clamp action at final step
-            if self.clamp_action and i == len(t_all) - 1:
+            if self.final_action_clip_value is not None and i == len(t_all) - 1:
-                x = torch.clamp(x, -1, 1)
+                x = torch.clamp(
                    x, -self.final_action_clip_value, self.final_action_clip_value
                )
        return x
    def forward_train(
@ -160,6 +155,6 @@ class DQLDiffusion(DiffusionModel):
            x = mean + std * noise
            # clamp action at final step
-            if self.clamp_action and i == len(t_all) - 1:
+            if self.final_action_clip_value and i == len(t_all) - 1:
                x = torch.clamp(x, -1, 1)
        return x
--- a/model/diffusion/diffusion_idql.py
+++ b/model/diffusion/diffusion_idql.py
@ -104,6 +104,7 @@ class IDQLDiffusion(RWRDiffusion):
        cond,
        t,
    ):
        """not reward-weighted, same as diffusion.py"""
        device = x_start.device
        # Forward process
--- a/model/diffusion/diffusion_rwr.py
+++ b/model/diffusion/diffusion_rwr.py
@ -11,7 +11,7 @@ log = logging.getLogger(__name__)
 import torch.nn.functional as F
 from model.diffusion.diffusion import DiffusionModel
-from model.diffusion.sampling import make_timesteps, extract
+from model.diffusion.sampling import make_timesteps
 class RWRDiffusion(DiffusionModel):
@ -19,21 +19,13 @@ class RWRDiffusion(DiffusionModel):
    def __init__(
        self,
        use_ddim=False,
-        # various clipping
+        # modifying denoising schedule
        randn_clip_value=10,
        clamp_action=None,
        min_sampling_denoising_std=0.1,
        **kwargs,
    ):
        super().__init__(use_ddim=use_ddim, **kwargs)
        assert not self.use_ddim, "RWR does not support DDIM"
        # For each denoising step, we clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
        # Action clamp range
        self.clamp_action = clamp_action
        # Minimum std used in denoising process when sampling action - helps exploration
        self.min_sampling_denoising_std = min_sampling_denoising_std
@ -47,6 +39,7 @@ class RWRDiffusion(DiffusionModel):
        rewards,
        t,
    ):
        """reward-weighted"""
        device = x_start.device
        # Forward process
@ -67,40 +60,6 @@ class RWRDiffusion(DiffusionModel):
    # ---------- Sampling ----------#
    # override
    def p_mean_var(
        self,
        x,
        t,
        cond,
    ):
        noise = self.network(x, t, cond=cond)
        # Predict x_0
        if self.predict_epsilon:
            """
            x₀ = √ 1\α̅ₜ xₜ - √ 1\α̅ₜ-1 ε
            """
            x_recon = (
                extract(self.sqrt_recip_alphas_cumprod, t, x.shape) * x
                - extract(self.sqrt_recipm1_alphas_cumprod, t, x.shape) * noise
            )
        else:  # directly predicting x₀
            x_recon = noise
        if self.denoised_clip_value is not None:
            x_recon.clamp_(-self.denoised_clip_value, self.denoised_clip_value)
        # Get mu
        """
        μₜ = β̃ₜ √ α̅ₜ₋₁/(1-α̅ₜ)x₀ + √ αₜ (1-α̅ₜ₋₁)/(1-α̅ₜ)xₜ
        """
        mu = (
            extract(self.ddpm_mu_coef1, t, x.shape) * x_recon
            + extract(self.ddpm_mu_coef2, t, x.shape) * x
        )
        logvar = extract(self.ddpm_logvar_clipped, t, x.shape)
        return mu, logvar
    # override
    @torch.no_grad()
    def forward(
@ -108,6 +67,7 @@ class RWRDiffusion(DiffusionModel):
        cond,
        deterministic=False,
    ):
        """Modifying denoising schedule"""
        device = self.betas.device
        B = len(cond["state"])
@ -126,7 +86,7 @@ class RWRDiffusion(DiffusionModel):
            # Determine noise level
            if deterministic and t == 0:
                std = torch.zeros_like(std)
-            elif deterministic:  # For DDPM, sample with noise
+            elif deterministic:
                std = torch.clip(std, min=1e-3)
            else:
                std = torch.clip(std, min=self.min_sampling_denoising_std)
@ -136,6 +96,8 @@ class RWRDiffusion(DiffusionModel):
            x = mean + std * noise
            # clamp action at final step
-            if self.clamp_action is not None and i == len(t_all) - 1:
+            if self.final_action_clip_value is not None and i == len(t_all) - 1:
-                x = torch.clamp(x, -self.clamp_action, self.clamp_action)
+                x = torch.clamp(
                    x, -self.final_action_clip_value, self.final_action_clip_value
                )
        return x
--- a/model/diffusion/diffusion_vpg.py
+++ b/model/diffusion/diffusion_vpg.py
@ -34,13 +34,10 @@ class VPGDiffusion(DiffusionModel):
        ft_denoising_steps_d=0,
        ft_denoising_steps_t=0,
        network_path=None,
-        # various clipping
+        # modifying denoising schedule
        randn_clip_value=10,
        clamp_action=False,
        min_sampling_denoising_std=0.1,
-        min_logprob_denoising_std=0.1,  # or the scheduler class
+        min_logprob_denoising_std=0.1,
-        eps_clip_value=None,  # only used with DDIM
+        # eta in DDIM
        # DDIM related
        eta=None,
        learn_eta=False,
        **kwargs,
@ -60,15 +57,6 @@ class VPGDiffusion(DiffusionModel):
        self.ft_denoising_steps_t = ft_denoising_steps_t  # annealing interval
        self.ft_denoising_steps_cnt = 0
        # Clip noise for numerical stability in policy gradient
        self.eps_clip_value = eps_clip_value
        # For each denoising step, we clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
        # Whether to clamp sampled action between [-1, 1]
        self.clamp_action = clamp_action
        # Minimum std used in denoising process when sampling action - helps exploration
        self.min_sampling_denoising_std = min_sampling_denoising_std
@ -214,7 +202,6 @@ class VPGDiffusion(DiffusionModel):
            if deterministic:
                etas = torch.zeros((x.shape[0], 1, 1)).to(x.device)
            else:
                # TODO: eta cond
                etas = self.eta(cond).unsqueeze(1)  # B x 1 x (Da or 1)
            sigma = (
                etas
@ -310,8 +297,8 @@ class VPGDiffusion(DiffusionModel):
            x = mean + std * noise
            # clamp action at final step
-            if self.clamp_action and i == len(t_all) - 1:
+            if self.final_action_clip_value is not None and i == len(t_all) - 1:
-                x = torch.clamp(x, -1, 1)
+                x = torch.clamp(x, -self.final_action_clip_value, self.final_action_clip_value)
            if return_chain:
                if not self.use_ddim and t <= self.ft_denoising_steps:
--- a/model/rl/gaussian_rwr.py
+++ b/model/rl/gaussian_rwr.py
@ -16,7 +16,6 @@ class RWR_Gaussian(GaussianModel):
    def __init__(
        self,
        actor,
        randn_clip_value=10,
        **kwargs,
    ):
        super().__init__(network=actor, **kwargs)
@ -24,9 +23,6 @@ class RWR_Gaussian(GaussianModel):
        # assign actor
        self.actor = self.network
        # Clip sampled randn (from standard deviation) such that the sampled action is not too far away from mean
        self.randn_clip_value = randn_clip_value
    # override
    def loss(self, actions, obs, reward_weights):
        B = len(obs)
@ -44,6 +40,5 @@ class RWR_Gaussian(GaussianModel):
        actions = super().forward(
            cond=cond,
            deterministic=deterministic,
            randn_clip_value=self.randn_clip_value,
        )
        return actions
--- a/model/rl/gaussian_vpg.py
+++ b/model/rl/gaussian_vpg.py
@ -15,11 +15,9 @@ class VPG_Gaussian(GaussianModel):
        self,
        actor,
        critic,
        randn_clip_value=10,
        **kwargs,
    ):
        super().__init__(network=actor, **kwargs)
        self.randn_clip_value = randn_clip_value
        # Value function for obs - simple MLP
        self.critic = critic.to(self.device)
@ -44,7 +42,6 @@ class VPG_Gaussian(GaussianModel):
        return super().forward(
            cond=cond,
            deterministic=deterministic,
            randn_clip_value=self.randn_clip_value,
            network_override=self.actor if use_base_policy else None,
        )