Ported new HopperJump Rew to master

2024-01-28 12:32:52 +01:00 · 2024-01-28 12:32:52 +01:00 · 9fce6fff42
commit 9fce6fff42
parent 1372a596b5
1 changed files with 97 additions and 73 deletions
--- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py
+++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py
@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML):
                return True
        return False
-# # TODO is that needed? if so test it
+class HopperJumpMarkovRew(HopperJumpEnv):
-# class HopperJumpStepEnv(HopperJumpEnv):
+    def step(self, action):
-#
+        self._steps += 1
-#     def __init__(self,
+
-#                  xml_file='hopper_jump.xml',
+        self.do_simulation(action, self.frame_skip)
-#                  forward_reward_weight=1.0,
+
-#                  ctrl_cost_weight=1e-3,
+        height_after = self.get_body_com("torso")[2]
-#                  healthy_reward=1.0,
+        # site_pos_after = self.data.get_site_xpos('foot_site')
-#                  height_weight=3,
+        site_pos_after = self.data.site('foot_site').xpos
-#                  dist_weight=3,
+        self.max_height = max(height_after, self.max_height)
-#                  terminate_when_unhealthy=False,
+
-#                  healthy_state_range=(-100.0, 100.0),
+        has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
-#                  healthy_z_range=(0.5, float('inf')),
+
-#                  healthy_angle_range=(-float('inf'), float('inf')),
+        if not self.init_floor_contact:
-#                  reset_noise_scale=5e-3,
+            self.init_floor_contact = has_floor_contact
-#                  exclude_current_positions_from_observation=False
+        if self.init_floor_contact and not self.has_left_floor:
-#                  ):
+            self.has_left_floor = not has_floor_contact
-#
+        if not self.contact_with_floor and self.has_left_floor:
-#         self._height_weight = height_weight
+            self.contact_with_floor = has_floor_contact
-#         self._dist_weight = dist_weight
+
-#         super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
+        ctrl_cost = self.control_cost(action)
-#                          healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
+        costs = ctrl_cost
-#                          exclude_current_positions_from_observation)
+        terminated = False
-#
+        truncated = False
-#     def step(self, action):
+
-#         self._steps += 1
+        goal_dist = np.linalg.norm(site_pos_after - self.goal)
-#
+        if self.contact_dist is None and self.contact_with_floor:
-#         self.do_simulation(action, self.frame_skip)
+            self.contact_dist = goal_dist
-#
+
-#         height_after = self.get_body_com("torso")[2]
+        rewards = 0
-#         site_pos_after = self.data.site('foot_site').xpos.copy()
+        if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
-#         self.max_height = max(height_after, self.max_height)
+            healthy_reward = self.healthy_reward
-#
+            distance_reward = -goal_dist * self._dist_weight
-#         ctrl_cost = self.control_cost(action)
+            height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
-#         healthy_reward = self.healthy_reward
+            contact_reward = -(self.contact_dist or 5) * self._contact_weight
-#         height_reward = self._height_weight * height_after
+            rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
-#         goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
+
-#         goal_dist_reward = -self._dist_weight * goal_dist
+        observation = self._get_obs()
-#         dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
+
-#
+        # While loop to simulate the process after jump to make the task Markovian
-#         rewards = dist_reward + healthy_reward
+        if self.sparse and self.has_left_floor:
-#         costs = ctrl_cost
+            while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP:
-#         done = False
+                # Simulate to the end of the episode
-#
+                self._steps += 1
-#         # This is only for logging the distance to goal when first having the contact
+
-#         has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
+                try:
-#
+                    self.do_simulation(np.zeros_like(action), self.frame_skip)
-#         if not self.init_floor_contact:
+                except Exception as e:
-#             self.init_floor_contact = has_floor_contact
+                    print(e)
-#         if self.init_floor_contact and not self.has_left_floor:
+
-#             self.has_left_floor = not has_floor_contact
+                height_after = self.get_body_com("torso")[2]
-#         if not self.contact_with_floor and self.has_left_floor:
+                #site_pos_after = self.data.get_site_xpos('foot_site')
-#             self.contact_with_floor = has_floor_contact
+                site_pos_after = self.data.site('foot_site').xpos
-#
+                self.max_height = max(height_after, self.max_height)
-#         if self.contact_dist is None and self.contact_with_floor:
+
-#             self.contact_dist = goal_dist
+                has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
-#
+
-#         ##############################################################
+                if not self.init_floor_contact:
-#
+                    self.init_floor_contact = has_floor_contact
-#         observation = self._get_obs()
+                if self.init_floor_contact and not self.has_left_floor:
-#         reward = rewards - costs
+                    self.has_left_floor = not has_floor_contact
-#         info = {
+                if not self.contact_with_floor and self.has_left_floor:
-#             'height': height_after,
+                    self.contact_with_floor = has_floor_contact
-#             'x_pos': site_pos_after,
+
-#             'max_height': copy.copy(self.max_height),
+                ctrl_cost = self.control_cost(action)
-#             'goal': copy.copy(self.goal),
+                costs = ctrl_cost
-#             'goal_dist': goal_dist,
+                done = False
-#             'height_rew': height_reward,
+
-#             'healthy_reward': healthy_reward,
+                goal_dist = np.linalg.norm(site_pos_after - self.goal)
-#             'healthy': copy.copy(self.is_healthy),
+                if self.contact_dist is None and self.contact_with_floor:
-#             'contact_dist': copy.copy(self.contact_dist) or 0
+                    self.contact_dist = goal_dist
-#         }
+
-#         return observation, reward, done, info
+                rewards = 0
            # Task has reached the end, compute the sparse reward
            done = True
            healthy_reward = self.healthy_reward
            distance_reward = -goal_dist * self._dist_weight
            height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
            contact_reward = -(self.contact_dist or 5) * self._contact_weight
            rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
        reward = rewards - costs
        info = dict(
            height=height_after,
            x_pos=site_pos_after,
            max_height=self.max_height,
            goal=self.goal[:1],
            goal_dist=goal_dist,
            height_rew=self.max_height,
            healthy_reward=self.healthy_reward,
            healthy=self.is_healthy,
            contact_dist=self.contact_dist or 0,
            num_steps=self._steps,
            has_left_floor=self.has_left_floor
       )
        return observation, reward, terminated, truncated, info