hopperjump-v2 -> init pos randomized
This commit is contained in:
parent
1c9019ab08
commit
855ddcee4b
@ -253,6 +253,15 @@ register(
|
|||||||
"context": True
|
"context": True
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ALRHopperJump-v2',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
|
||||||
|
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||||
|
kwargs={
|
||||||
|
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
|
||||||
|
}
|
||||||
|
)
|
||||||
# CtxtFree are v0, Contextual are v1
|
# CtxtFree are v0, Contextual are v1
|
||||||
register(
|
register(
|
||||||
id='ALRHopperJumpOnBox-v0',
|
id='ALRHopperJumpOnBox-v0',
|
||||||
@ -667,6 +676,31 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
## HopperJump
|
||||||
|
register(
|
||||||
|
id= "ALRHopperJumpProMP-v2",
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": f"alr_envs:ALRHopperJump-v2",
|
||||||
|
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 3,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"weights_scale": 1.0,
|
||||||
|
"zero_start": True,
|
||||||
|
"zero_goal": False,
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": np.ones(3),
|
||||||
|
"d_gains": 0.1*np.ones(3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
|
||||||
|
|
||||||
## HopperJumpOnBox
|
## HopperJumpOnBox
|
||||||
_versions = ["v0", "v1"]
|
_versions = ["v0", "v1"]
|
||||||
for _v in _versions:
|
for _v in _versions:
|
||||||
|
@ -6,7 +6,7 @@ from .table_tennis.tt_gym import TTEnvGym
|
|||||||
from .beerpong.beerpong import ALRBeerBongEnv
|
from .beerpong.beerpong import ALRBeerBongEnv
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
|
||||||
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
|
||||||
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
|
||||||
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
|
||||||
|
@ -1 +1 @@
|
|||||||
from .mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
|
||||||
|
@ -91,6 +91,54 @@ class ALRHopperJumpEnv(HopperEnv):
|
|||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
|
|
||||||
|
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
|
||||||
|
def __init__(self, max_episode_steps=250):
|
||||||
|
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
|
||||||
|
reset_noise_scale=5e-1,
|
||||||
|
max_episode_steps=max_episode_steps)
|
||||||
|
def reset_model(self):
|
||||||
|
noise_low = -self._reset_noise_scale
|
||||||
|
noise_high = self._reset_noise_scale
|
||||||
|
|
||||||
|
qpos = self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
|
||||||
|
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
|
||||||
|
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
return observation
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.do_simulation(action, self.frame_skip)
|
||||||
|
height_after = self.get_body_com("torso")[2]
|
||||||
|
self.max_height = max(height_after, self.max_height)
|
||||||
|
|
||||||
|
ctrl_cost = self.control_cost(action)
|
||||||
|
costs = ctrl_cost
|
||||||
|
done = False
|
||||||
|
|
||||||
|
if self.current_step >= self.max_episode_steps:
|
||||||
|
healthy_reward = 0
|
||||||
|
height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
|
||||||
|
rewards = height_reward + healthy_reward
|
||||||
|
|
||||||
|
else:
|
||||||
|
# penalty for wrong start direction of first two joints; not needed, could be removed
|
||||||
|
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
|
||||||
|
|
||||||
|
observation = self._get_obs()
|
||||||
|
reward = rewards - costs
|
||||||
|
info = {
|
||||||
|
'height': height_after,
|
||||||
|
'max_height': self.max_height,
|
||||||
|
'goal': self.goal
|
||||||
|
}
|
||||||
|
|
||||||
|
return observation, reward, done, info
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
env = ALRHopperJumpEnv()
|
env = ALRHopperJumpEnv()
|
||||||
|
@ -29,3 +29,29 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
@property
|
@property
|
||||||
def dt(self) -> Union[float, int]:
|
def dt(self) -> Union[float, int]:
|
||||||
return self.env.dt
|
return self.env.dt
|
||||||
|
|
||||||
|
|
||||||
|
class HighCtxtMPWrapper(MPWrapper):
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
return np.hstack([
|
||||||
|
[True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
|
||||||
|
[False] * 6, # velocity
|
||||||
|
[False]
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.sim.data.qpos[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.sim.data.qvel[3:6].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
||||||
|
Loading…
Reference in New Issue
Block a user