Fixed Bug: Wrong dimensions for action_loss

2022-09-03 11:44:01 +02:00 · 2022-09-03 11:44:01 +02:00 · 0aeea4e2e5
commit 0aeea4e2e5
parent 4bb772a251
1 changed files with 3 additions and 1 deletions
--- a/metastable_baselines/ppo/ppo.py
+++ b/metastable_baselines/ppo/ppo.py
@ -330,12 +330,14 @@ class PPO(GaussianRolloutCollectorAuxclass, OnPolicyAlgorithm):
                trust_region_losses.append(trust_region_loss.item())
                # 'Principle of least action'
-                action_loss = th.square(actions)
+                action_loss = th.mean(th.square(actions))
                action_losses.append(action_loss)
                policy_loss = surrogate_loss + self.ent_coef * entropy_loss + \
                    trust_region_loss + self.action_coef * action_loss
                import pdb
                pdb.set_trace()
                pg_losses.append(policy_loss.item())
                loss = policy_loss + self.vf_coef * value_loss