Fixed Bug: Wrong dimensions for action_loss
This commit is contained in:
		
							parent
							
								
									4bb772a251
								
							
						
					
					
						commit
						0aeea4e2e5
					
				| @ -330,12 +330,14 @@ class PPO(GaussianRolloutCollectorAuxclass, OnPolicyAlgorithm): | |||||||
|                 trust_region_losses.append(trust_region_loss.item()) |                 trust_region_losses.append(trust_region_loss.item()) | ||||||
| 
 | 
 | ||||||
|                 # 'Principle of least action' |                 # 'Principle of least action' | ||||||
|                 action_loss = th.square(actions) |                 action_loss = th.mean(th.square(actions)) | ||||||
| 
 | 
 | ||||||
|                 action_losses.append(action_loss) |                 action_losses.append(action_loss) | ||||||
| 
 | 
 | ||||||
|                 policy_loss = surrogate_loss + self.ent_coef * entropy_loss + \ |                 policy_loss = surrogate_loss + self.ent_coef * entropy_loss + \ | ||||||
|                     trust_region_loss + self.action_coef * action_loss |                     trust_region_loss + self.action_coef * action_loss | ||||||
|  |                 import pdb | ||||||
|  |                 pdb.set_trace() | ||||||
|                 pg_losses.append(policy_loss.item()) |                 pg_losses.append(policy_loss.item()) | ||||||
| 
 | 
 | ||||||
|                 loss = policy_loss + self.vf_coef * value_loss |                 loss = policy_loss + self.vf_coef * value_loss | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user