From 8fe6a83271e015c41710cd55861f5344695da982 Mon Sep 17 00:00:00 2001 From: Onur Date: Tue, 28 Jun 2022 20:33:19 +0200 Subject: [PATCH] started cleaning up init. DMP envs are still not transferred. Wrappers for various environments still missing --- alr_envs/alr/__init__.py | 1204 ++++++----------- alr_envs/alr/mujoco/beerpong/beerpong.py | 64 +- .../alr/mujoco/hopper_jump/hopper_jump.py | 70 - 3 files changed, 418 insertions(+), 920 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 09f533a..ec539db 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -21,6 +21,35 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +DEFAULT_MP_ENV_DICT = { + "name": 'EnvName', + "wrappers": [], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 7 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 1.5, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(7), + "d_gains": np.ones(7) * 0.1, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 2 + } +} + # Classic Control ## Simple Reacher register( @@ -32,16 +61,6 @@ register( } ) -register( - id='SimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 2, - "random_start": False - } -) - register( id='LongSimpleReacher-v0', entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', @@ -51,16 +70,6 @@ register( } ) -register( - id='LongSimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False - } -) - ## Viapoint Reacher register( @@ -91,38 +100,6 @@ register( } ) -register( - id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": None, - "collision_penalty": 100, - } -) - -register( - id='HoleReacher-v2', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": 2, - "collision_penalty": 1, - } -) - # Mujoco ## Reacher @@ -203,108 +180,39 @@ register( } ) -_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] -for i in _vs: - _env_id = f'ALRReacher{i}-v0' - register( - id=_env_id, - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 0, - "n_links": 5, - "balance": False, - 'ctrl_cost_weight': i - } - ) - - _env_id = f'ALRReacherSparse{i}-v0' - register( - id=_env_id, - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": False, - 'ctrl_cost_weight': i - } - ) - -# CtxtFree are v0, Contextual are v1 register( id='ALRAntJump-v0', entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, - "context": False - } -) - -# CtxtFree are v0, Contextual are v1 -register( - id='ALRAntJump-v1', - entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, "context": True } ) -# CtxtFree are v0, Contextual are v1 register( id='ALRHalfCheetahJump-v0', entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHalfCheetahJump-v1', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, "context": True } ) -# CtxtFree are v0, Contextual are v1 + register( id='ALRHopperJump-v0', entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": False, - "healthy_reward": 1.0 - } -) -register( - id='ALRHopperJump-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, "context": True } ) +#### Hopper Jump random joints and des position register( - id='ALRHopperJump-v2', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP - } -) - -register( - id='ALRHopperJump-v3', + id='ALRHopperJumpRndmJointsDesPos-v0', entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ @@ -314,9 +222,9 @@ register( } ) -##### Hopper Jump step based reward +##### Hopper Jump random joints and des position step based reward register( - id='ALRHopperJump-v4', + id='ALRHopperJumpRndmJointsDesPosStepBased-v0', entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ @@ -326,84 +234,40 @@ register( } ) - -# CtxtFree are v0, Contextual are v1 register( id='ALRHopperJumpOnBox-v0', entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperJumpOnBox-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, "context": True } ) -# CtxtFree are v0, Contextual are v1 register( id='ALRHopperThrow-v0', entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperThrow-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, "context": True } ) -# CtxtFree are v0, Contextual are v1 register( id='ALRHopperThrowInBasket-v0', entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperThrowInBasket-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, "context": True } ) -# CtxtFree are v0, Contextual are v1 + register( id='ALRWalker2DJump-v0', entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRWalker2DJump-v1', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, "context": True @@ -427,76 +291,46 @@ register(id='TableTennis2DCtxt-v0', max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2}) -register(id='TableTennis2DCtxt-v1', - entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 2, 'fixed_goal': True}) - register(id='TableTennis4DCtxt-v0', entry_point='alr_envs.alr.mujocco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 4}) -## BeerPong -# fixed goal cup position register( - id='ALRBeerPong-v0', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', - max_episode_steps=300, - kwargs={ - "rndm_goal": False, - "cup_goal_pos": [0.1, -2.0], - "frame_skip": 2 - } - ) + id='ALRBeerPong-v0', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } +) - -# random goal cup position +# Here we use the same reward as in ALRBeerPong-v0, but now consider after the release, +# only one time step, i.e. we simulate until the end of th episode register( - id='ALRBeerPong-v1', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) - -# random goal cup position -register( - id='ALRBeerPong-v2', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) -# Beerpong with episodic reward, but fixed release time step -register( - id='ALRBeerPong-v3', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) + id='ALRBeerPongStepBased-v0', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } +) # Beerpong with episodic reward, but fixed release time step register( - id='ALRBeerPong-v4', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) + id='ALRBeerPongFixedRelease-v0', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } +) # Motion Primitive Environments @@ -530,25 +364,17 @@ for _v in _versions: ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_simple_reacher_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_simple_reacher_promp['wrappers'].append('TODO') # TODO + kwargs_dict_simple_reacher_promp['movement_primitives_kwargs']['action_dim'] = 2 if "long" not in _v.lower() else 5 + kwargs_dict_simple_reacher_promp['phase_generator_kwargs']['tau'] = 2 + kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6 + kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075 + kwargs_dict_simple_reacher_promp['name'] = _env_id register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "policy_type": "motor", - "weights_scale": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": .6, - "d_gains": .075 - } - } - } + kwargs=kwargs_dict_simple_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -573,28 +399,24 @@ register( ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +kwargs_dict_via_point_reacher_promp = dict(DEFAULT_MP_ENV_DICT) +kwargs_dict_via_point_reacher_promp['wrappers'].append('TODO') # TODO +kwargs_dict_via_point_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 +kwargs_dict_via_point_reacher_promp['phase_generator_kwargs']['tau'] = 2 +kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' +kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0" register( id="ViaPointReacherProMP-v0", entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "policy_type": "velocity", - "weights_scale": 1, - "zero_start": True - } - } + kwargs=kwargs_dict_via_point_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ## Hole Reacher -_versions = ["v0", "v1", "v2"] +_versions = ["HoleReacher-v0"] for _v in _versions: - _env_id = f'HoleReacherDMP-{_v}' + _name = _v.split("-") + _env_id = f'{_name[0]}DMP-{_name[1]}' register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', @@ -617,22 +439,19 @@ for _v in _versions: ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - _env_id = f'HoleReacherProMP-{_v}' + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_hole_reacher_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_hole_reacher_promp['wrappers'].append('TODO') # TODO + kwargs_dict_hole_reacher_promp['ep_wrapper_kwargs']['weight_scale'] = 2 + kwargs_dict_hole_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 + kwargs_dict_hole_reacher_promp['phase_generator_kwargs']['tau'] = 2 + kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' + kwargs_dict_hole_reacher_promp['basis_generator_kwargs']['num_basis'] = 5 + kwargs_dict_hole_reacher_promp['name'] = f"alr_envs:{_v}" register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 3, - "duration": 2, - "policy_type": "velocity", - "weights_scale": 5, - "zero_start": True - } - } + kwargs=kwargs_dict_hole_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -666,30 +485,268 @@ for _v in _versions: ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_alr_reacher_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_alr_reacher_promp['wrappers'].append('TODO') # TODO + kwargs_dict_alr_reacher_promp['movement_primitives_kwargs']['action_dim'] = 5 if "long" not in _v.lower() else 7 + kwargs_dict_alr_reacher_promp['phase_generator_kwargs']['tau'] = 4 + kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1 + kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1 + kwargs_dict_alr_reacher_promp['name'] = f"alr_envs:{_v}" register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5 if "long" not in _v.lower() else 7, - "num_basis": 2, - "duration": 4, - "policy_type": "motor", - "weights_scale": 5, - "zero_start": True, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } + kwargs=kwargs_dict_alr_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +######################################################################################################################## +## Beerpong ProMP +_versions = ['ALRBeerPong-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper) + kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7 + kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.8 + kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True + kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) + kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 + kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs=kwargs_dict_bp_promp + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +### BP with Fixed release +_versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_bp_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.NewMPWrapper) + kwargs_dict_bp_promp['movement_primitives_kwargs']['action_dim'] = 7 + kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62 + kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) + kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 + kwargs_dict_bp_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs=kwargs_dict_bp_promp_fixed_release + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +######################################################################################################################## + +## Table Tennis needs to be fixed according to Zhou's implementation + +######################################################################################################################## + +## AntJump +_versions = ['ALRAntJump-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_ant_jump_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper) + kwargs_dict_ant_jump_promp['movement_primitives_kwargs']['action_dim'] = 8 + kwargs_dict_ant_jump_promp['phase_generator_kwargs']['tau'] = 10 + kwargs_dict_ant_jump_promp['controller_kwargs']['p_gains'] = np.ones(8) + kwargs_dict_ant_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(8) + kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs=kwargs_dict_ant_jump_promp + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +######################################################################################################################## + +## HalfCheetahJump +_versions = ['ALRHalfCheetahJump-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_halfcheetah_jump_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.ant_jump.NewMPWrapper) + kwargs_dict_halfcheetah_jump_promp['movement_primitives_kwargs']['action_dim'] = 6 + kwargs_dict_halfcheetah_jump_promp['phase_generator_kwargs']['tau'] = 5 + kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['p_gains'] = np.ones(6) + kwargs_dict_halfcheetah_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6) + kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs=kwargs_dict_halfcheetah_jump_promp + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +######################################################################################################################## + + +## HopperJump +_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0', + 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0'] + +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_hopper_jump_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_hopper_jump_promp['wrappers'].append('TODO') # TODO + kwargs_dict_hopper_jump_promp['movement_primitives_kwargs']['action_dim'] = 3 + kwargs_dict_hopper_jump_promp['phase_generator_kwargs']['tau'] = 2 + kwargs_dict_hopper_jump_promp['controller_kwargs']['p_gains'] = np.ones(3) + kwargs_dict_hopper_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(3) + kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs=kwargs_dict_hopper_jump_promp + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +######################################################################################################################## + + +## Walker2DJump +_versions = ['ALRWalker2DJump-v0'] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}ProMP-{_name[1]}' + kwargs_dict_walker2d_jump_promp = dict(DEFAULT_MP_ENV_DICT) + kwargs_dict_walker2d_jump_promp['wrappers'].append('TODO') # TODO + kwargs_dict_walker2d_jump_promp['movement_primitives_kwargs']['action_dim'] = 6 + kwargs_dict_walker2d_jump_promp['phase_generator_kwargs']['tau'] = 2.4 + kwargs_dict_walker2d_jump_promp['controller_kwargs']['p_gains'] = np.ones(6) + kwargs_dict_walker2d_jump_promp['controller_kwargs']['d_gains'] = 0.1 * np.ones(6) + kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}" + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs=kwargs_dict_walker2d_jump_promp + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +### Depricated, we will not provide non random starts anymore +""" +register( + id='SimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 2, + "random_start": False + } +) + +register( + id='LongSimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False + } +) +register( + id='HoleReacher-v1', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": None, + "collision_penalty": 100, + } +) +register( + id='HoleReacher-v2', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": 2, + "collision_penalty": 1, + } +) + +# CtxtFree are v0, Contextual are v1 +register( + id='ALRAntJump-v0', + entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHalfCheetahJump-v0', + entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + "context": False + } +) +register( + id='ALRHopperJump-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": False, + "healthy_reward": 1.0 + } +) + +""" + +### Deprecated used for CorL paper +""" _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] +for i in _vs: + _env_id = f'ALRReacher{i}-v0' + register( + id=_env_id, + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 5, + "balance": False, + 'ctrl_cost_weight': i + } + ) + + _env_id = f'ALRReacherSparse{i}-v0' + register( + id=_env_id, + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": False, + 'ctrl_cost_weight': i + } + ) + _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] for i in _vs: _env_id = f'ALRReacher{i}ProMP-v0' register( @@ -736,543 +793,56 @@ for i in _vs: } } ) - - -# ## Beerpong -# _versions = ["v0", "v1"] -# for _v in _versions: -# _env_id = f'BeerpongProMP-{_v}' -# register( -# id=_env_id, -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRBeerPong-{_v}", -# "wrappers": [mujoco.beerpong.MPWrapper], -# "mp_kwargs": { -# "num_dof": 7, -# "num_basis": 2, -# # "duration": 1, -# "duration": 0.5, -# # "post_traj_time": 2, -# "post_traj_time": 2.5, -# "policy_type": "motor", -# "weights_scale": 0.14, -# # "weights_scale": 1, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), -# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Beerpong -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'BeerpongProMP-{_v}' + register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + id='ALRHopperJumpOnBox-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, kwargs={ - "name": f"alr_envs:ALRBeerPong-{_v}", - "wrappers": [mujoco.beerpong.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 7 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 0.8, # initial value - 'learn_tau': True, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 2, - 'num_basis_zero_start': 2 - } - } + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + "context": False + } ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Beerpong ProMP fixed release -_env_id = 'BeerpongProMP-v2' -register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + register( + id='ALRHopperThrow-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, kwargs={ - "name": "alr_envs:ALRBeerPong-v4", - "wrappers": [mujoco.beerpong.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 7 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 0.62, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 2, - 'num_basis_zero_start': 2 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Table Tennis -ctxt_dim = [2, 4] -for _v, cd in enumerate(ctxt_dim): - _env_id = f'TableTennisProMP-v{_v}' + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, + "context": False + } + ) register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd), - "wrappers": [mujoco.table_tennis.MPWrapper], - "mp_kwargs": { - "num_dof": 7, - "num_basis": 2, - "duration": 1.25, - "post_traj_time": 1.5, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), - "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## AntJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRAntJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRAntJump-{_v}", - "wrappers": [mujoco.ant_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 8, - "num_basis": 5, - "duration": 10, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(8), - "d_gains": 0.1*np.ones(8) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## AntJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRAntJumpProMP-{_v}' - register( - id= _env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRAntJump-{_v}", - "wrappers": [mujoco.ant_jump.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 8 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 10, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(8), - "d_gains": 0.1*np.ones(8), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 2 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - - -## HalfCheetahJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHalfCheetahJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHalfCheetahJump-{_v}", - "wrappers": [mujoco.half_cheetah_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 6, - "num_basis": 5, - "duration": 5, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(6), - "d_gains": 0.1*np.ones(6) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# ## HopperJump -# _versions = ["v0", "v1"] -# for _v in _versions: -# _env_id = f'ALRHopperJumpProMP-{_v}' -# register( -# id= _env_id, -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRHopperJump-{_v}", -# "wrappers": [mujoco.hopper_jump.MPWrapper], -# "mp_kwargs": { -# "num_dof": 3, -# "num_basis": 5, -# "duration": 2, -# "post_traj_time": 0, -# "policy_type": "motor", -# "weights_scale": 1.0, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.ones(3), -# "d_gains": 0.1*np.ones(3) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# ## HopperJump -# register( -# id= "ALRHopperJumpProMP-v2", -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRHopperJump-v2", -# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper], -# "mp_kwargs": { -# "num_dof": 3, -# "num_basis": 5, -# "duration": 2, -# "post_traj_time": 0, -# "policy_type": "motor", -# "weights_scale": 1.0, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.ones(3), -# "d_gains": 0.1*np.ones(3) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") - -## HopperJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperJumpProMP-{_v}' - register( - id= _env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-{_v}", - "wrappers": [mujoco.hopper_jump.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## HopperJump -register( - id= "ALRHopperJumpProMP-v2", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + id='ALRHopperThrowInBasket-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, kwargs={ - "name": f"alr_envs:ALRHopperJump-v2", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") - - -## HopperJump -register( - id= "ALRHopperJumpProMP-v3", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + "context": False + } + ) + register( + id='ALRWalker2DJump-v0', + entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, kwargs={ - "name": f"alr_envs:ALRHopperJump-v3", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3") - - -## HopperJump -register( - id= "ALRHopperJumpProMP-v4", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-v4", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4") - -## HopperJumpOnBox -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperJumpOnBoxProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJumpOnBox-{_v}", - "wrappers": [mujoco.hopper_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } - } + "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, + "context": False + } ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + register(id='TableTennis2DCtxt-v1', + entry_point='alr_envs.alr.mujoco:TTEnvGym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim': 2, 'fixed_goal': True}) -#HopperThrow -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperThrowProMP-{_v}' register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + id='ALRBeerPong-v0', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=300, kwargs={ - "name": f"alr_envs:ALRHopperThrow-{_v}", - "wrappers": [mujoco.hopper_throw.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } + "rndm_goal": False, + "cup_goal_pos": [0.1, -2.0], + "frame_skip": 2 } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## HopperThrowInBasket -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperThrowInBasketProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperThrowInBasket-{_v}", - "wrappers": [mujoco.hopper_throw.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Walker2DJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRWalker2DJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRWalker2DJump-{_v}", - "wrappers": [mujoco.walker_2d_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 6, - "num_basis": 5, - "duration": 2.4, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(6), - "d_gains": 0.1*np.ones(6) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) \ No newline at end of file + ) +""" diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index b7d376e..64d9e78 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -178,8 +178,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): [self._steps], ]) - def compute_reward(self): - @property def dt(self): return super(ALRBeerBongEnv, self).dt * self.repeat_action @@ -213,37 +211,37 @@ class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv): return ob, reward, done, infos -class ALRBeerBongEnvStepBased(ALRBeerBongEnv): - def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): - super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) - self.release_step = 62 # empirically evaluated for frame_skip=2! - - def step(self, a): - if self._steps < self.release_step: - return super(ALRBeerBongEnvStepBased, self).step(a) - else: - reward = 0 - done = False - while not done: - sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape)) - if not done or sub_infos['sim_crash']: - reward += sub_reward - else: - ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() - cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ - self.sim.model._site_name2id["cup_goal_final_table"]].copy()) - cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ - self.sim.model._site_name2id["cup_goal_table"]].copy()) - if sub_infos['success']: - dist_rew = -cup_goal_dist_final ** 2 - else: - dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2 - reward = reward - sub_infos['action_cost'] + dist_rew - infos = sub_infos - ob = sub_ob - ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the - # internal steps and thus, the observation also needs to be set correctly - return ob, reward, done, infos +# class ALRBeerBongEnvStepBased(ALRBeerBongEnv): +# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): +# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) +# self.release_step = 62 # empirically evaluated for frame_skip=2! +# +# def step(self, a): +# if self._steps < self.release_step: +# return super(ALRBeerBongEnvStepBased, self).step(a) +# else: +# reward = 0 +# done = False +# while not done: +# sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBased, self).step(np.zeros(a.shape)) +# if not done or sub_infos['sim_crash']: +# reward += sub_reward +# else: +# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() +# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ +# self.sim.model._site_name2id["cup_goal_final_table"]].copy()) +# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ +# self.sim.model._site_name2id["cup_goal_table"]].copy()) +# if sub_infos['success']: +# dist_rew = -cup_goal_dist_final ** 2 +# else: +# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2 +# reward = reward - sub_infos['action_cost'] + dist_rew +# infos = sub_infos +# ob = sub_ob +# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the +# # internal steps and thus, the observation also needs to be set correctly +# return ob, reward, done, infos if __name__ == "__main__": diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py index 146b039..5cd234c 100644 --- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py +++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py @@ -298,76 +298,6 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv): return observation, reward, done, info -class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv): - def __init__(self, max_episode_steps=250): - super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False, - reset_noise_scale=5e-1, - max_episode_steps=max_episode_steps) - - def reset_model(self): - self._floor_geom_id = self.model.geom_name2id('floor') - self._foot_geom_id = self.model.geom_name2id('foot_geom') - noise_low = -np.ones(self.model.nq) * self._reset_noise_scale - noise_low[1] = 0 - noise_low[2] = 0 - noise_low[3] = -0.2 - noise_low[4] = -0.2 - noise_low[5] = -0.1 - - noise_high = np.ones(self.model.nq) * self._reset_noise_scale - noise_high[1] = 0 - noise_high[2] = 0 - noise_high[3] = 0 - noise_high[4] = 0 - noise_high[5] = 0.1 - - rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) - # rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and - # can not recover - # rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3) - qpos = self.init_qpos + rnd_vec - qvel = self.init_qvel - - self.set_state(qpos, qvel) - - observation = self._get_obs() - return observation - - def step(self, action): - - self.current_step += 1 - self.do_simulation(action, self.frame_skip) - - self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \ - self.contact_with_floor else True - - height_after = self.get_body_com("torso")[2] - self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0 - - ctrl_cost = self.control_cost(action) - costs = ctrl_cost - done = False - - if self.current_step >= self.max_episode_steps: - healthy_reward = 0 - height_reward = self._forward_reward_weight * self.max_height # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley - rewards = height_reward + healthy_reward - - else: - # penalty for wrong start direction of first two joints; not needed, could be removed - rewards = ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 - - observation = self._get_obs() - reward = rewards - costs - info = { - 'height': height_after, - 'max_height': self.max_height, - 'goal': self.goal - } - - return observation, reward, done, info - - if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" # env = ALRHopperJumpEnv()