diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index dd45698..8315a09 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -285,6 +285,20 @@ register( "healthy_reward": 1.0 } ) + +##### Hopper Jump step based reward +register( + id='ALRHopperJump-v4', + entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": True, + "healthy_reward": 1.0 + } +) + + # CtxtFree are v0, Contextual are v1 register( id='ALRHopperJumpOnBox-v0', @@ -925,7 +939,7 @@ register( "basis_generator_kwargs": { 'basis_generator_type': 'zero_rbf', 'num_basis': 5, - 'num_basis_zero_start': 2 + 'num_basis_zero_start': 1 } } ) @@ -961,12 +975,48 @@ register( "basis_generator_kwargs": { 'basis_generator_type': 'zero_rbf', 'num_basis': 5, - 'num_basis_zero_start': 2 + 'num_basis_zero_start': 1 } } ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3") + +## HopperJump +register( + id= "ALRHopperJumpProMP-v4", + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJump-v4", + "wrappers": [mujoco.hopper_jump.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 3 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 2, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4") + ## HopperJumpOnBox _versions = ["v0", "v1"] for _v in _versions: diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 5c04b03..c02a70f 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -5,7 +5,7 @@ from .table_tennis.tt_gym import TTEnvGym from .beerpong.beerpong import ALRBeerBongEnv from .ant_jump.ant_jump import ALRAntJumpEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv -from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv +from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index 508cfba..8846643 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -189,7 +189,7 @@ if __name__ == "__main__": ac = np.zeros(7) obs, rew, d, info = env.step(ac) env.render("human") - + print(env.dt) print(rew) if d: diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py index 979e224..6bbbb5b 100644 --- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py +++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py @@ -120,7 +120,6 @@ class ALRHopperJumpEnv(HopperEnv): class ALRHopperXYJumpEnv(ALRHopperJumpEnv): - # The goal here is the desired x-Position of the Torso def step(self, action): @@ -154,20 +153,15 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv): ctrl_cost = self.control_cost(action) costs = ctrl_cost done = False - goal_dist = 0 + goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0] rewards = 0 if self.current_step >= self.max_episode_steps: # healthy_reward = 0 if self.context else self.healthy_reward * self.current_step healthy_reward = self.healthy_reward * 2 #* self.current_step - goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object))[0] contact_dist = self.contact_dist if self.contact_dist is not None else 5 dist_reward = self._forward_reward_weight * (-3*goal_dist + 10*self.max_height - 2*contact_dist) rewards = dist_reward + healthy_reward - # else: - ## penalty for wrong start direction of first two joints; not needed, could be removed - # rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 - observation = self._get_obs() reward = rewards - costs info = { @@ -175,11 +169,10 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv): 'x_pos': site_pos_after, 'max_height': self.max_height, 'goal': self.goal, - 'dist_rew': goal_dist, + 'goal_dist': goal_dist, 'height_rew': self.max_height, 'healthy_reward': self.healthy_reward * 2 } - return observation, reward, done, info def reset_model(self): @@ -213,12 +206,48 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv): def reset(self): super().reset() # self.goal = np.random.uniform(-1.5, 1.5, 1) - self.goal = np.random.uniform(0, 1.5, 1) + # self.goal = np.random.uniform(0, 1.5, 1) + self.goal = self.np_random.uniform(0, 1.5, 1) # self.goal = np.array([1.5]) self.sim.model.body_pos[self.sim.model.body_name2id('goal_site_body')] = np.array([self.goal, 0, 0], dtype=object) return self.reset_model() +class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv): + def step(self, action): + + self._floor_geom_id = self.model.geom_name2id('floor') + self._foot_geom_id = self.model.geom_name2id('foot_geom') + + self.current_step += 1 + self.do_simulation(action, self.frame_skip) + height_after = self.get_body_com("torso")[2] + site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() + self.max_height = max(height_after, self.max_height) + ctrl_cost = self.control_cost(action) + + healthy_reward = self.healthy_reward * 2 + height_reward = 10*height_after + goal_dist = np.atleast_1d(np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0], dtype=object)))[0] + goal_dist_reward = -3*goal_dist + dist_reward = self._forward_reward_weight * (goal_dist_reward + height_reward) + reward = -ctrl_cost + healthy_reward + dist_reward + done = False + observation = self._get_obs() + info = { + 'height': height_after, + 'x_pos': site_pos_after, + 'max_height': self.max_height, + 'goal': self.goal, + 'goal_dist': goal_dist, + 'dist_rew': dist_reward, + 'height_rew': height_reward, + 'healthy_reward': healthy_reward, + 'ctrl_reward': -ctrl_cost + } + + return observation, reward, done, info + class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv): def __init__(self, max_episode_steps=250): super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False, @@ -293,7 +322,8 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv): if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" # env = ALRHopperJumpEnv() - env = ALRHopperXYJumpEnv() + # env = ALRHopperXYJumpEnv() + env = ALRHopperXYJumpEnvStepBased() # env = ALRHopperJumpRndmPosEnv() obs = env.reset() diff --git a/alr_envs/mp/episodic_wrapper.py b/alr_envs/mp/episodic_wrapper.py index 21fb035..456c0c9 100644 --- a/alr_envs/mp/episodic_wrapper.py +++ b/alr_envs/mp/episodic_wrapper.py @@ -223,6 +223,9 @@ class EpisodicWrapper(gym.Env, ABC): def get_observation_from_step(self, observation: np.ndarray) -> np.ndarray: return observation[self.active_obs] + def seed(self, seed=None): + self.env.seed(seed) + def plot_trajs(self, des_trajs, des_vels): import matplotlib.pyplot as plt import matplotlib