started cleaning up init. DMP envs are still not transferred. Wrappers for various environments still missing
This commit is contained in:
parent
9ad6fbe712
commit
8fe6a83271
File diff suppressed because it is too large
Load Diff
@ -178,8 +178,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
[self._steps],
|
[self._steps],
|
||||||
])
|
])
|
||||||
|
|
||||||
def compute_reward(self):
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self):
|
def dt(self):
|
||||||
return super(ALRBeerBongEnv, self).dt * self.repeat_action
|
return super(ALRBeerBongEnv, self).dt * self.repeat_action
|
||||||
@ -213,37 +211,37 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
|||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
|
|
||||||
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
# class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
# self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||||
|
#
|
||||||
def step(self, a):
|
# def step(self, a):
|
||||||
if self._steps < self.release_step:
|
# if self._steps < self.release_step:
|
||||||
return super(ALRBeerBongEnvStepBased, self).step(a)
|
# return super(ALRBeerBongEnvStepBased, self).step(a)
|
||||||
else:
|
# else:
|
||||||
reward = 0
|
# reward = 0
|
||||||
done = False
|
# done = False
|
||||||
while not done:
|
# while not done:
|
||||||
sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
|
# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape))
|
||||||
if not done or sub_infos['sim_crash']:
|
# if not done or sub_infos['sim_crash']:
|
||||||
reward += sub_reward
|
# reward += sub_reward
|
||||||
else:
|
# else:
|
||||||
ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
|
# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
|
||||||
cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
||||||
self.sim.model._site_name2id["cup_goal_final_table"]].copy())
|
# self.sim.model._site_name2id["cup_goal_final_table"]].copy())
|
||||||
cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[
|
||||||
self.sim.model._site_name2id["cup_goal_table"]].copy())
|
# self.sim.model._site_name2id["cup_goal_table"]].copy())
|
||||||
if sub_infos['success']:
|
# if sub_infos['success']:
|
||||||
dist_rew = -cup_goal_dist_final ** 2
|
# dist_rew = -cup_goal_dist_final ** 2
|
||||||
else:
|
# else:
|
||||||
dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
|
# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2
|
||||||
reward = reward - sub_infos['action_cost'] + dist_rew
|
# reward = reward - sub_infos['action_cost'] + dist_rew
|
||||||
infos = sub_infos
|
# infos = sub_infos
|
||||||
ob = sub_ob
|
# ob = sub_ob
|
||||||
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||||
# internal steps and thus, the observation also needs to be set correctly
|
# # internal steps and thus, the observation also needs to be set correctly
|
||||||
return ob, reward, done, infos
|
# return ob, reward, done, infos
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -298,76 +298,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
|
|||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
|
|
||||||
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
|
||||||
def __init__(self, max_episode_steps=250):
|
|
||||||
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
|
|
||||||
reset_noise_scale=5e-1,
|
|
||||||
max_episode_steps=max_episode_steps)
|
|
||||||
|
|
||||||
def reset_model(self):
|
|
||||||
self._floor_geom_id = self.model.geom_name2id('floor')
|
|
||||||
self._foot_geom_id = self.model.geom_name2id('foot_geom')
|
|
||||||
noise_low = -np.ones(self.model.nq) * self._reset_noise_scale
|
|
||||||
noise_low[1] = 0
|
|
||||||
noise_low[2] = 0
|
|
||||||
noise_low[3] = -0.2
|
|
||||||
noise_low[4] = -0.2
|
|
||||||
noise_low[5] = -0.1
|
|
||||||
|
|
||||||
noise_high = np.ones(self.model.nq) * self._reset_noise_scale
|
|
||||||
noise_high[1] = 0
|
|
||||||
noise_high[2] = 0
|
|
||||||
noise_high[3] = 0
|
|
||||||
noise_high[4] = 0
|
|
||||||
noise_high[5] = 0.1
|
|
||||||
|
|
||||||
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
|
||||||
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
|
|
||||||
# can not recover
|
|
||||||
# rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
|
|
||||||
qpos = self.init_qpos + rnd_vec
|
|
||||||
qvel = self.init_qvel
|
|
||||||
|
|
||||||
self.set_state(qpos, qvel)
|
|
||||||
|
|
||||||
observation = self._get_obs()
|
|
||||||
return observation
|
|
||||||
|
|
||||||
def step(self, action):
|
|
||||||
|
|
||||||
self.current_step += 1
|
|
||||||
self.do_simulation(action, self.frame_skip)
|
|
||||||
|
|
||||||
self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
|
|
||||||
self.contact_with_floor else True
|
|
||||||
|
|
||||||
height_after = self.get_body_com("torso")[2]
|
|
||||||
self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0
|
|
||||||
|
|
||||||
ctrl_cost = self.control_cost(action)
|
|
||||||
costs = ctrl_cost
|
|
||||||
done = False
|
|
||||||
|
|
||||||
if self.current_step >= self.max_episode_steps:
|
|
||||||
healthy_reward = 0
|
|
||||||
height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
|
|
||||||
rewards = height_reward + healthy_reward
|
|
||||||
|
|
||||||
else:
|
|
||||||
# penalty for wrong start direction of first two joints; not needed, could be removed
|
|
||||||
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
|
|
||||||
|
|
||||||
observation = self._get_obs()
|
|
||||||
reward = rewards - costs
|
|
||||||
info = {
|
|
||||||
'height': height_after,
|
|
||||||
'max_height': self.max_height,
|
|
||||||
'goal': self.goal
|
|
||||||
}
|
|
||||||
|
|
||||||
return observation, reward, done, info
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
# env = ALRHopperJumpEnv()
|
# env = ALRHopperJumpEnv()
|
||||||
|
Loading…
Reference in New Issue
Block a user