hopper throw seeding fixed
This commit is contained in:
parent
5d4fc4d52f
commit
993df10fad
@ -27,7 +27,7 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
healthy_z_range=(0.7, float('inf')),
|
healthy_z_range=(0.7, float('inf')),
|
||||||
healthy_angle_range=(-float('inf'), float('inf')),
|
healthy_angle_range=(-float('inf'), float('inf')),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
context = True,
|
context=True,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
max_episode_steps=250):
|
max_episode_steps=250):
|
||||||
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
|
||||||
@ -40,10 +40,10 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
exclude_current_positions_from_observation)
|
exclude_current_positions_from_observation)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
|
ball_pos_after = self.get_body_com("ball")[
|
||||||
|
0] # abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
|
||||||
ball_pos_after_y = self.get_body_com("ball")[2]
|
ball_pos_after_y = self.get_body_com("ball")[2]
|
||||||
|
|
||||||
# done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
|
# done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
|
||||||
@ -57,7 +57,7 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
|
|
||||||
if self.current_step >= self.max_episode_steps or done:
|
if self.current_step >= self.max_episode_steps or done:
|
||||||
distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
|
distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \
|
||||||
self._forward_reward_weight * ball_pos_after
|
self._forward_reward_weight * ball_pos_after
|
||||||
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
healthy_reward = 0 if self.context else self.healthy_reward * self.current_step
|
||||||
|
|
||||||
rewards = distance_reward + healthy_reward
|
rewards = distance_reward + healthy_reward
|
||||||
@ -67,8 +67,8 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
info = {
|
info = {
|
||||||
'ball_pos': ball_pos_after,
|
'ball_pos': ball_pos_after,
|
||||||
'ball_pos_y': ball_pos_after_y,
|
'ball_pos_y': ball_pos_after_y,
|
||||||
'_steps' : self.current_step,
|
'_steps': self.current_step,
|
||||||
'goal' : self.goal,
|
'goal': self.goal,
|
||||||
}
|
}
|
||||||
|
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
@ -78,7 +78,7 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
|
self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
# overwrite reset_model to make it deterministic
|
# overwrite reset_model to make it deterministic
|
||||||
@ -86,14 +86,15 @@ class ALRHopperThrowEnv(HopperEnv):
|
|||||||
noise_low = -self._reset_noise_scale
|
noise_low = -self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
env = ALRHopperThrowEnv()
|
env = ALRHopperThrowEnv()
|
||||||
@ -110,4 +111,4 @@ if __name__ == '__main__':
|
|||||||
print('After ', i, ' steps, done: ', d)
|
print('After ', i, ' steps, done: ', d)
|
||||||
env.reset()
|
env.reset()
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
|
@ -3,7 +3,6 @@ from gym.envs.mujoco.hopper_v3 import HopperEnv
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
|
MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
|
||||||
|
|
||||||
|
|
||||||
@ -33,7 +32,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
|
|||||||
context=True,
|
context=True,
|
||||||
penalty=0.0,
|
penalty=0.0,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
max_episode_steps = 250):
|
max_episode_steps=250):
|
||||||
self.hit_basket_reward = hit_basket_reward
|
self.hit_basket_reward = hit_basket_reward
|
||||||
self.current_step = 0
|
self.current_step = 0
|
||||||
self.max_episode_steps = max_episode_steps
|
self.max_episode_steps = max_episode_steps
|
||||||
@ -57,7 +56,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
|
|||||||
basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
|
basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
|
||||||
|
|
||||||
is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
|
is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
|
||||||
is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
|
is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + (
|
||||||
|
self.basket_size / 2)
|
||||||
is_in_basket_z = ball_pos[2] < 0.1
|
is_in_basket_z = ball_pos[2] < 0.1
|
||||||
is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
|
is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
|
||||||
if is_in_basket: self.ball_in_basket = True
|
if is_in_basket: self.ball_in_basket = True
|
||||||
@ -77,15 +77,16 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
|
|||||||
if not self.context:
|
if not self.context:
|
||||||
rewards += self.hit_basket_reward
|
rewards += self.hit_basket_reward
|
||||||
else:
|
else:
|
||||||
dist = np.linalg.norm(ball_pos-basket_center)
|
dist = np.linalg.norm(ball_pos - basket_center)
|
||||||
if self.context:
|
if self.context:
|
||||||
rewards = -10 * dist
|
rewards = -10 * dist
|
||||||
else:
|
else:
|
||||||
rewards -= (dist*dist)
|
rewards -= (dist * dist)
|
||||||
else:
|
else:
|
||||||
# penalty not needed
|
# penalty not needed
|
||||||
rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
|
rewards += ((action[
|
||||||
|
:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 # too much of a penalty?
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
reward = rewards - costs
|
reward = rewards - costs
|
||||||
info = {
|
info = {
|
||||||
@ -106,7 +107,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
|
|||||||
self.ball_in_basket = False
|
self.ball_in_basket = False
|
||||||
if self.context:
|
if self.context:
|
||||||
basket_id = self.sim.model.body_name2id("basket_ground")
|
basket_id = self.sim.model.body_name2id("basket_ground")
|
||||||
self.basket_x = np.random.uniform(3, 7, 1)
|
self.basket_x = self.np_random.uniform(3, 7, 1)
|
||||||
self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
|
self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
@ -115,8 +116,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
|
|||||||
noise_low = -self._reset_noise_scale
|
noise_low = -self._reset_noise_scale
|
||||||
noise_high = self._reset_noise_scale
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
@ -140,4 +141,4 @@ if __name__ == '__main__':
|
|||||||
print('After ', i, ' steps, done: ', d)
|
print('After ', i, ' steps, done: ', d)
|
||||||
env.reset()
|
env.reset()
|
||||||
|
|
||||||
env.close()
|
env.close()
|
||||||
|
Loading…
Reference in New Issue
Block a user