Ported new HopperJump Rew to master
This commit is contained in:
parent
1372a596b5
commit
9fce6fff42
@ -262,76 +262,100 @@ class HopperJumpEnv(HopperEnvCustomXML):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# # TODO is that needed? if so test it
|
class HopperJumpMarkovRew(HopperJumpEnv):
|
||||||
# class HopperJumpStepEnv(HopperJumpEnv):
|
def step(self, action):
|
||||||
#
|
self._steps += 1
|
||||||
# def __init__(self,
|
|
||||||
# xml_file='hopper_jump.xml',
|
self.do_simulation(action, self.frame_skip)
|
||||||
# forward_reward_weight=1.0,
|
|
||||||
# ctrl_cost_weight=1e-3,
|
height_after = self.get_body_com("torso")[2]
|
||||||
# healthy_reward=1.0,
|
# site_pos_after = self.data.get_site_xpos('foot_site')
|
||||||
# height_weight=3,
|
site_pos_after = self.data.site('foot_site').xpos
|
||||||
# dist_weight=3,
|
self.max_height = max(height_after, self.max_height)
|
||||||
# terminate_when_unhealthy=False,
|
|
||||||
# healthy_state_range=(-100.0, 100.0),
|
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||||
# healthy_z_range=(0.5, float('inf')),
|
|
||||||
# healthy_angle_range=(-float('inf'), float('inf')),
|
if not self.init_floor_contact:
|
||||||
# reset_noise_scale=5e-3,
|
self.init_floor_contact = has_floor_contact
|
||||||
# exclude_current_positions_from_observation=False
|
if self.init_floor_contact and not self.has_left_floor:
|
||||||
# ):
|
self.has_left_floor = not has_floor_contact
|
||||||
#
|
if not self.contact_with_floor and self.has_left_floor:
|
||||||
# self._height_weight = height_weight
|
self.contact_with_floor = has_floor_contact
|
||||||
# self._dist_weight = dist_weight
|
|
||||||
# super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
|
ctrl_cost = self.control_cost(action)
|
||||||
# healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
|
costs = ctrl_cost
|
||||||
# exclude_current_positions_from_observation)
|
terminated = False
|
||||||
#
|
truncated = False
|
||||||
# def step(self, action):
|
|
||||||
# self._steps += 1
|
goal_dist = np.linalg.norm(site_pos_after - self.goal)
|
||||||
#
|
if self.contact_dist is None and self.contact_with_floor:
|
||||||
# self.do_simulation(action, self.frame_skip)
|
self.contact_dist = goal_dist
|
||||||
#
|
|
||||||
# height_after = self.get_body_com("torso")[2]
|
rewards = 0
|
||||||
# site_pos_after = self.data.site('foot_site').xpos.copy()
|
if not self.sparse or (self.sparse and self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP):
|
||||||
# self.max_height = max(height_after, self.max_height)
|
healthy_reward = self.healthy_reward
|
||||||
#
|
distance_reward = -goal_dist * self._dist_weight
|
||||||
# ctrl_cost = self.control_cost(action)
|
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
|
||||||
# healthy_reward = self.healthy_reward
|
contact_reward = -(self.contact_dist or 5) * self._contact_weight
|
||||||
# height_reward = self._height_weight * height_after
|
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
|
||||||
# goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
|
|
||||||
# goal_dist_reward = -self._dist_weight * goal_dist
|
observation = self._get_obs()
|
||||||
# dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward)
|
|
||||||
#
|
# While loop to simulate the process after jump to make the task Markovian
|
||||||
# rewards = dist_reward + healthy_reward
|
if self.sparse and self.has_left_floor:
|
||||||
# costs = ctrl_cost
|
while self._steps < MAX_EPISODE_STEPS_HOPPERJUMP:
|
||||||
# done = False
|
# Simulate to the end of the episode
|
||||||
#
|
self._steps += 1
|
||||||
# # This is only for logging the distance to goal when first having the contact
|
|
||||||
# has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
try:
|
||||||
#
|
self.do_simulation(np.zeros_like(action), self.frame_skip)
|
||||||
# if not self.init_floor_contact:
|
except Exception as e:
|
||||||
# self.init_floor_contact = has_floor_contact
|
print(e)
|
||||||
# if self.init_floor_contact and not self.has_left_floor:
|
|
||||||
# self.has_left_floor = not has_floor_contact
|
height_after = self.get_body_com("torso")[2]
|
||||||
# if not self.contact_with_floor and self.has_left_floor:
|
#site_pos_after = self.data.get_site_xpos('foot_site')
|
||||||
# self.contact_with_floor = has_floor_contact
|
site_pos_after = self.data.site('foot_site').xpos
|
||||||
#
|
self.max_height = max(height_after, self.max_height)
|
||||||
# if self.contact_dist is None and self.contact_with_floor:
|
|
||||||
# self.contact_dist = goal_dist
|
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
|
||||||
#
|
|
||||||
# ##############################################################
|
if not self.init_floor_contact:
|
||||||
#
|
self.init_floor_contact = has_floor_contact
|
||||||
# observation = self._get_obs()
|
if self.init_floor_contact and not self.has_left_floor:
|
||||||
# reward = rewards - costs
|
self.has_left_floor = not has_floor_contact
|
||||||
# info = {
|
if not self.contact_with_floor and self.has_left_floor:
|
||||||
# 'height': height_after,
|
self.contact_with_floor = has_floor_contact
|
||||||
# 'x_pos': site_pos_after,
|
|
||||||
# 'max_height': copy.copy(self.max_height),
|
ctrl_cost = self.control_cost(action)
|
||||||
# 'goal': copy.copy(self.goal),
|
costs = ctrl_cost
|
||||||
# 'goal_dist': goal_dist,
|
done = False
|
||||||
# 'height_rew': height_reward,
|
|
||||||
# 'healthy_reward': healthy_reward,
|
goal_dist = np.linalg.norm(site_pos_after - self.goal)
|
||||||
# 'healthy': copy.copy(self.is_healthy),
|
if self.contact_dist is None and self.contact_with_floor:
|
||||||
# 'contact_dist': copy.copy(self.contact_dist) or 0
|
self.contact_dist = goal_dist
|
||||||
# }
|
|
||||||
# return observation, reward, done, info
|
rewards = 0
|
||||||
|
|
||||||
|
# Task has reached the end, compute the sparse reward
|
||||||
|
done = True
|
||||||
|
healthy_reward = self.healthy_reward
|
||||||
|
distance_reward = -goal_dist * self._dist_weight
|
||||||
|
height_reward = (self.max_height if self.sparse else height_after) * self._height_weight
|
||||||
|
contact_reward = -(self.contact_dist or 5) * self._contact_weight
|
||||||
|
rewards = self._forward_reward_weight * (distance_reward + height_reward + contact_reward + healthy_reward)
|
||||||
|
|
||||||
|
reward = rewards - costs
|
||||||
|
info = dict(
|
||||||
|
height=height_after,
|
||||||
|
x_pos=site_pos_after,
|
||||||
|
max_height=self.max_height,
|
||||||
|
goal=self.goal[:1],
|
||||||
|
goal_dist=goal_dist,
|
||||||
|
height_rew=self.max_height,
|
||||||
|
healthy_reward=self.healthy_reward,
|
||||||
|
healthy=self.is_healthy,
|
||||||
|
contact_dist=self.contact_dist or 0,
|
||||||
|
num_steps=self._steps,
|
||||||
|
has_left_floor=self.has_left_floor
|
||||||
|
)
|
||||||
|
return observation, reward, terminated, truncated, info
|
||||||
|
Loading…
Reference in New Issue
Block a user