Ported new HopperJump Rew to master

This commit is contained in:
Dominik Moritz Roth 2024-01-28 12:32:52 +01:00
parent 1372a596b5
commit 9fce6fff42

View File

@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML):
return True return True
return False return False
# # TODO is that needed? if so test it class HopperJumpMarkovRew(HopperJumpEnv):
# class HopperJumpStepEnv(HopperJumpEnv): def step(self, action):
# self._steps += 1
# def __init__(self,
# xml_file='hopper_jump.xml', self.do_simulation(action, self.frame_skip)
# forward_reward_weight=1.0,
# ctrl_cost_weight=1e-3, height_after = self.get_body_com("torso")[2]
# healthy_reward=1.0, # site_pos_after = self.data.get_site_xpos('foot_site')
# height_weight=3, site_pos_after = self.data.site('foot_site').xpos
# dist_weight=3, self.max_height = max(height_after, self.max_height)
# terminate_when_unhealthy=False,
# healthy_state_range=(-100.0, 100.0), has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
# healthy_z_range=(0.5, float('inf')),
# healthy_angle_range=(-float('inf'), float('inf')), if not self.init_floor_contact:
# reset_noise_scale=5e-3, self.init_floor_contact = has_floor_contact
# exclude_current_positions_from_observation=False if self.init_floor_contact and not self.has_left_floor:
# ): self.has_left_floor = not has_floor_contact
# if not self.contact_with_floor and self.has_left_floor:
# self._height_weight = height_weight self.contact_with_floor = has_floor_contact
# self._dist_weight = dist_weight
# super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, ctrl_cost = self.control_cost(action)
# healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, costs = ctrl_cost
# exclude_current_positions_from_observation) terminated = False
# truncated = False
# def step(self, action):
# self._steps += 1 goal_dist = np.linalg.norm(site_pos_after - self.goal)
# if self.contact_dist is None and self.contact_with_floor:
# self.do_simulation(action, self.frame_skip) self.contact_dist = goal_dist
#
# height_after = self.get_body_com("torso")[2] rewards = 0
# site_pos_after = self.data.site('foot_site').xpos.copy() if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
# self.max_height = max(height_after, self.max_height) healthy_reward = self.healthy_reward
# distance_reward = -goal_dist * self._dist_weight
# ctrl_cost = self.control_cost(action) height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
# healthy_reward = self.healthy_reward contact_reward = -(self.contact_dist or 5) * self._contact_weight
# height_reward = self._height_weight * height_after rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
# goal_dist_reward = -self._dist_weight * goal_dist observation = self._get_obs()
# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
# # While loop to simulate the process after jump to make the task Markovian
# rewards = dist_reward + healthy_reward if self.sparse and self.has_left_floor:
# costs = ctrl_cost while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP:
# done = False # Simulate to the end of the episode
# self._steps += 1
# # This is only for logging the distance to goal when first having the contact
# has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False try:
# self.do_simulation(np.zeros_like(action), self.frame_skip)
# if not self.init_floor_contact: except Exception as e:
# self.init_floor_contact = has_floor_contact print(e)
# if self.init_floor_contact and not self.has_left_floor:
# self.has_left_floor = not has_floor_contact height_after = self.get_body_com("torso")[2]
# if not self.contact_with_floor and self.has_left_floor: #site_pos_after = self.data.get_site_xpos('foot_site')
# self.contact_with_floor = has_floor_contact site_pos_after = self.data.site('foot_site').xpos
# self.max_height = max(height_after, self.max_height)
# if self.contact_dist is None and self.contact_with_floor:
# self.contact_dist = goal_dist has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
#
# ############################################################## if not self.init_floor_contact:
# self.init_floor_contact = has_floor_contact
# observation = self._get_obs() if self.init_floor_contact and not self.has_left_floor:
# reward = rewards - costs self.has_left_floor = not has_floor_contact
# info = { if not self.contact_with_floor and self.has_left_floor:
# 'height': height_after, self.contact_with_floor = has_floor_contact
# 'x_pos': site_pos_after,
# 'max_height': copy.copy(self.max_height), ctrl_cost = self.control_cost(action)
# 'goal': copy.copy(self.goal), costs = ctrl_cost
# 'goal_dist': goal_dist, done = False
# 'height_rew': height_reward,
# 'healthy_reward': healthy_reward, goal_dist = np.linalg.norm(site_pos_after - self.goal)
# 'healthy': copy.copy(self.is_healthy), if self.contact_dist is None and self.contact_with_floor:
# 'contact_dist': copy.copy(self.contact_dist) or 0 self.contact_dist = goal_dist
# }
# return observation, reward, done, info rewards = 0
# Task has reached the end, compute the sparse reward
done = True
healthy_reward = self.healthy_reward
distance_reward = -goal_dist * self._dist_weight
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
contact_reward = -(self.contact_dist or 5) * self._contact_weight
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
reward = rewards - costs
info = dict(
height=height_after,
x_pos=site_pos_after,
max_height=self.max_height,
goal=self.goal[:1],
goal_dist=goal_dist,
height_rew=self.max_height,
healthy_reward=self.healthy_reward,
healthy=self.is_healthy,
contact_dist=self.contact_dist or 0,
num_steps=self._steps,
has_left_floor=self.has_left_floor
)
return observation, reward, terminated, truncated, info