hopperjump-v2 -> init pos randomized

This commit is contained in:
Onur 2022-04-20 14:50:02 +02:00
parent 1c9019ab08
commit 855ddcee4b
5 changed files with 110 additions and 2 deletions

View File

@ -253,6 +253,15 @@ register(
"context": True "context": True
} }
) )
register(
id='ALRHopperJump-v2',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP
}
)
# CtxtFree are v0, Contextual are v1 # CtxtFree are v0, Contextual are v1
register( register(
id='ALRHopperJumpOnBox-v0', id='ALRHopperJumpOnBox-v0',
@ -667,6 +676,31 @@ for _v in _versions:
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
## HopperJump
register(
id= "ALRHopperJumpProMP-v2",
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v2",
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
## HopperJumpOnBox ## HopperJumpOnBox
_versions = ["v0", "v1"] _versions = ["v0", "v1"]
for _v in _versions: for _v in _versions:

View File

@ -6,7 +6,7 @@ from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv from .beerpong.beerpong import ALRBeerBongEnv
from .ant_jump.ant_jump import ALRAntJumpEnv from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv

View File

@ -1 +1 @@
from .mp_wrapper import MPWrapper from .mp_wrapper import MPWrapper, HighCtxtMPWrapper

View File

@ -91,6 +91,54 @@ class ALRHopperJumpEnv(HopperEnv):
observation = self._get_obs() observation = self._get_obs()
return observation return observation
class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def __init__(self, max_episode_steps=250):
super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
reset_noise_scale=5e-1,
max_episode_steps=max_episode_steps)
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def step(self, action):
self.current_step += 1
self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2]
self.max_height = max(height_after, self.max_height)
ctrl_cost = self.control_cost(action)
costs = ctrl_cost
done = False
if self.current_step >= self.max_episode_steps:
healthy_reward = 0
height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley
rewards = height_reward + healthy_reward
else:
# penalty for wrong start direction of first two joints; not needed, could be removed
rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0
observation = self._get_obs()
reward = rewards - costs
info = {
'height': height_after,
'max_height': self.max_height,
'goal': self.goal
}
return observation, reward, done, info
if __name__ == '__main__': if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final" render_mode = "human" # "human" or "partial" or "final"
env = ALRHopperJumpEnv() env = ALRHopperJumpEnv()

View File

@ -29,3 +29,29 @@ class MPWrapper(MPEnvWrapper):
@property @property
def dt(self) -> Union[float, int]: def dt(self) -> Union[float, int]:
return self.env.dt return self.env.dt
class HighCtxtMPWrapper(MPWrapper):
@property
def active_obs(self):
return np.hstack([
[True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[False]
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt