started cleaning up init. DMP envs are still not transferred. Wrappers for various environments still missing
This commit is contained in:
		
							parent
							
								
									9ad6fbe712
								
							
						
					
					
						commit
						8fe6a83271
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -178,8 +178,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): | |||||||
|             [self._steps], |             [self._steps], | ||||||
|             ]) |             ]) | ||||||
| 
 | 
 | ||||||
|     def compute_reward(self): |  | ||||||
| 
 |  | ||||||
|     @property |     @property | ||||||
|     def dt(self): |     def dt(self): | ||||||
|         return super(ALRBeerBongEnv, self).dt * self.repeat_action |         return super(ALRBeerBongEnv, self).dt * self.repeat_action | ||||||
| @ -213,37 +211,37 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv): | |||||||
|         return ob, reward, done, infos |         return ob, reward, done, infos | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ALRBeerBongEnvStepBased(ALRBeerBongEnv): | # class ALRBeerBongEnvStepBased(ALRBeerBongEnv): | ||||||
|     def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): | #     def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): | ||||||
|         super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) | #         super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) | ||||||
|         self.release_step = 62  # empirically evaluated for frame_skip=2! | #         self.release_step = 62  # empirically evaluated for frame_skip=2! | ||||||
| 
 | # | ||||||
|     def step(self, a): | #     def step(self, a): | ||||||
|         if self._steps < self.release_step: | #         if self._steps < self.release_step: | ||||||
|             return super(ALRBeerBongEnvStepBased, self).step(a) | #             return super(ALRBeerBongEnvStepBased, self).step(a) | ||||||
|         else: | #         else: | ||||||
|             reward = 0 | #             reward = 0 | ||||||
|             done = False | #             done = False | ||||||
|             while not done: | #             while not done: | ||||||
|                 sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape)) | #                 sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape)) | ||||||
|                 if not done or sub_infos['sim_crash']: | #                 if not done or sub_infos['sim_crash']: | ||||||
|                     reward += sub_reward | #                     reward += sub_reward | ||||||
|                 else: | #                 else: | ||||||
|                     ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() | #                     ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() | ||||||
|                     cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ | #                     cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ | ||||||
|                         self.sim.model._site_name2id["cup_goal_final_table"]].copy()) | #                         self.sim.model._site_name2id["cup_goal_final_table"]].copy()) | ||||||
|                     cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ | #                     cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ | ||||||
|                         self.sim.model._site_name2id["cup_goal_table"]].copy()) | #                         self.sim.model._site_name2id["cup_goal_table"]].copy()) | ||||||
|                     if sub_infos['success']: | #                     if sub_infos['success']: | ||||||
|                         dist_rew = -cup_goal_dist_final ** 2 | #                         dist_rew = -cup_goal_dist_final ** 2 | ||||||
|                     else: | #                     else: | ||||||
|                         dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2 | #                         dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2 | ||||||
|                     reward = reward - sub_infos['action_cost'] + dist_rew | #                     reward = reward - sub_infos['action_cost'] + dist_rew | ||||||
|             infos = sub_infos | #             infos = sub_infos | ||||||
|             ob = sub_ob | #             ob = sub_ob | ||||||
|             ob[-1] = self.release_step + 1  # Since we simulate until the end of the episode, PPO does not see the | #             ob[-1] = self.release_step + 1  # Since we simulate until the end of the episode, PPO does not see the | ||||||
|             # internal steps and thus, the observation also needs to be set correctly | #             # internal steps and thus, the observation also needs to be set correctly | ||||||
|         return ob, reward, done, infos | #         return ob, reward, done, infos | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|  | |||||||
| @ -298,76 +298,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv): | |||||||
|         return observation, reward, done, info |         return observation, reward, done, info | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv): |  | ||||||
|     def __init__(self, max_episode_steps=250): |  | ||||||
|         super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False, |  | ||||||
|                                                       reset_noise_scale=5e-1, |  | ||||||
|                                                       max_episode_steps=max_episode_steps) |  | ||||||
| 
 |  | ||||||
|     def reset_model(self): |  | ||||||
|         self._floor_geom_id = self.model.geom_name2id('floor') |  | ||||||
|         self._foot_geom_id = self.model.geom_name2id('foot_geom') |  | ||||||
|         noise_low = -np.ones(self.model.nq) * self._reset_noise_scale |  | ||||||
|         noise_low[1] = 0 |  | ||||||
|         noise_low[2] = 0 |  | ||||||
|         noise_low[3] = -0.2 |  | ||||||
|         noise_low[4] = -0.2 |  | ||||||
|         noise_low[5] = -0.1 |  | ||||||
| 
 |  | ||||||
|         noise_high = np.ones(self.model.nq) * self._reset_noise_scale |  | ||||||
|         noise_high[1] = 0 |  | ||||||
|         noise_high[2] = 0 |  | ||||||
|         noise_high[3] = 0 |  | ||||||
|         noise_high[4] = 0 |  | ||||||
|         noise_high[5] = 0.1 |  | ||||||
| 
 |  | ||||||
|         rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) |  | ||||||
|         # rnd_vec[2] *= 0.05  # the angle around the y axis shouldn't be too high as the agent then falls down quickly and |  | ||||||
|         # can not recover |  | ||||||
|         # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3) |  | ||||||
|         qpos = self.init_qpos + rnd_vec |  | ||||||
|         qvel = self.init_qvel |  | ||||||
| 
 |  | ||||||
|         self.set_state(qpos, qvel) |  | ||||||
| 
 |  | ||||||
|         observation = self._get_obs() |  | ||||||
|         return observation |  | ||||||
| 
 |  | ||||||
|     def step(self, action): |  | ||||||
| 
 |  | ||||||
|         self.current_step += 1 |  | ||||||
|         self.do_simulation(action, self.frame_skip) |  | ||||||
| 
 |  | ||||||
|         self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \ |  | ||||||
|             self.contact_with_floor else True |  | ||||||
| 
 |  | ||||||
|         height_after = self.get_body_com("torso")[2] |  | ||||||
|         self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0 |  | ||||||
| 
 |  | ||||||
|         ctrl_cost = self.control_cost(action) |  | ||||||
|         costs = ctrl_cost |  | ||||||
|         done = False |  | ||||||
| 
 |  | ||||||
|         if self.current_step >= self.max_episode_steps: |  | ||||||
|             healthy_reward = 0 |  | ||||||
|             height_reward = self._forward_reward_weight * self.max_height  # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley |  | ||||||
|             rewards = height_reward + healthy_reward |  | ||||||
| 
 |  | ||||||
|         else: |  | ||||||
|             # penalty for wrong start direction of first two joints; not needed, could be removed |  | ||||||
|             rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 |  | ||||||
| 
 |  | ||||||
|         observation = self._get_obs() |  | ||||||
|         reward = rewards - costs |  | ||||||
|         info = { |  | ||||||
|             'height': height_after, |  | ||||||
|             'max_height': self.max_height, |  | ||||||
|             'goal': self.goal |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|         return observation, reward, done, info |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     render_mode = "human"  # "human" or "partial" or "final" |     render_mode = "human"  # "human" or "partial" or "final" | ||||||
|     # env = ALRHopperJumpEnv() |     # env = ALRHopperJumpEnv() | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user