hopperjump-v2 -> init pos randomized
This commit is contained in:
parent
1c9019ab08
commit
855ddcee4b
@ -253,6 +253,15 @@ register(
|
||||
"context": True
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='ALRHopperJump-v2',
|
||||
entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
|
||||
}
|
||||
)
|
||||
# CtxtFree are v0, Contextual are v1
|
||||
register(
|
||||
id='ALRHopperJumpOnBox-v0',
|
||||
@ -667,6 +676,31 @@ for _v in _versions:
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
## HopperJump
|
||||
register(
|
||||
id= "ALRHopperJumpProMP-v2",
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"alr_envs:ALRHopperJump-v2",
|
||||
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 3,
|
||||
"num_basis": 5,
|
||||
"duration": 2,
|
||||
"post_traj_time": 0,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 1.0,
|
||||
"zero_start": True,
|
||||
"zero_goal": False,
|
||||
"policy_kwargs": {
|
||||
"p_gains": np.ones(3),
|
||||
"d_gains": 0.1*np.ones(3)
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
||||
|
||||
## HopperJumpOnBox
|
||||
_versions = ["v0", "v1"]
|
||||
for _v in _versions:
|
||||
|
@ -6,7 +6,7 @@ from .table_tennis.tt_gym import TTEnvGym
|
||||
from .beerpong.beerpong import ALRBeerBongEnv
|
||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv
|
||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
|
||||
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||
|
@ -1 +1 @@
|
||||
from .mp_wrapper import MPWrapper
|
||||
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
|
||||
|
@ -91,6 +91,54 @@ class ALRHopperJumpEnv(HopperEnv):
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
|
||||
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
||||
def __init__(self, max_episode_steps=250):
|
||||
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
|
||||
reset_noise_scale=5e-1,
|
||||
max_episode_steps=max_episode_steps)
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
|
||||
self.current_step += 1
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
height_after = self.get_body_com("torso")[2]
|
||||
self.max_height = max(height_after, self.max_height)
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
costs = ctrl_cost
|
||||
done = False
|
||||
|
||||
if self.current_step >= self.max_episode_steps:
|
||||
healthy_reward = 0
|
||||
height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
|
||||
rewards = height_reward + healthy_reward
|
||||
|
||||
else:
|
||||
# penalty for wrong start direction of first two joints; not needed, could be removed
|
||||
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = rewards - costs
|
||||
info = {
|
||||
'height': height_after,
|
||||
'max_height': self.max_height,
|
||||
'goal': self.goal
|
||||
}
|
||||
|
||||
return observation, reward, done, info
|
||||
|
||||
if __name__ == '__main__':
|
||||
render_mode = "human" # "human" or "partial" or "final"
|
||||
env = ALRHopperJumpEnv()
|
||||
|
@ -29,3 +29,29 @@ class MPWrapper(MPEnvWrapper):
|
||||
@property
|
||||
def dt(self) -> Union[float, int]:
|
||||
return self.env.dt
|
||||
|
||||
|
||||
class HighCtxtMPWrapper(MPWrapper):
|
||||
@property
|
||||
def active_obs(self):
|
||||
return np.hstack([
|
||||
[True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
|
||||
[False] * 6, # velocity
|
||||
[False]
|
||||
])
|
||||
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||
return self.env.sim.data.qpos[3:6].copy()
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.env.sim.data.qvel[3:6].copy()
|
||||
|
||||
@property
|
||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||
|
||||
@property
|
||||
def dt(self) -> Union[float, int]:
|
||||
return self.env.dt
|
||||
|
Loading…
Reference in New Issue
Block a user