Fixed Bug: Wrong dimensions for action_loss
This commit is contained in:
parent
4bb772a251
commit
0aeea4e2e5
@ -330,12 +330,14 @@ class PPO(GaussianRolloutCollectorAuxclass, OnPolicyAlgorithm):
|
|||||||
trust_region_losses.append(trust_region_loss.item())
|
trust_region_losses.append(trust_region_loss.item())
|
||||||
|
|
||||||
# 'Principle of least action'
|
# 'Principle of least action'
|
||||||
action_loss = th.square(actions)
|
action_loss = th.mean(th.square(actions))
|
||||||
|
|
||||||
action_losses.append(action_loss)
|
action_losses.append(action_loss)
|
||||||
|
|
||||||
policy_loss = surrogate_loss + self.ent_coef * entropy_loss + \
|
policy_loss = surrogate_loss + self.ent_coef * entropy_loss + \
|
||||||
trust_region_loss + self.action_coef * action_loss
|
trust_region_loss + self.action_coef * action_loss
|
||||||
|
import pdb
|
||||||
|
pdb.set_trace()
|
||||||
pg_losses.append(policy_loss.item())
|
pg_losses.append(policy_loss.item())
|
||||||
|
|
||||||
loss = policy_loss + self.vf_coef * value_loss
|
loss = policy_loss + self.vf_coef * value_loss
|
||||||
|
Loading…
Reference in New Issue
Block a user