diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py new file mode 100644 index 0000000..b49776a --- /dev/null +++ b/alr_envs/alr/__init__.py @@ -0,0 +1,1290 @@ +import numpy as np +from gym import register + +from . import classic_control, mujoco +from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv +from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv +from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv +from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv +from .mujoco.reacher.alr_reacher import ALRReacherEnv +from .mujoco.reacher.balancing import BalancingEnv + +from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS +from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP +from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP +from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP +from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX +from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW +from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET +from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP + +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} + +# Classic Control +## Simple Reacher +register( + id='SimpleReacher-v0', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 2, + } +) + +register( + id='SimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 2, + "random_start": False + } +) + +register( + id='LongSimpleReacher-v0', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + } +) + +register( + id='LongSimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False + } +) + +## Viapoint Reacher + +register( + id='ViaPointReacher-v0', + entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "allow_self_collision": False, + "collision_penalty": 1000 + } +) + +## Hole Reacher +register( + id='HoleReacher-v0', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": True, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": None, + "hole_depth": 1, + "hole_x": None, + "collision_penalty": 100, + } +) + +register( + id='HoleReacher-v1', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": None, + "collision_penalty": 100, + } +) + +register( + id='HoleReacher-v2', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": 2, + "collision_penalty": 1, + } +) + +# Mujoco + +## Reacher +register( + id='ALRReacher-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 5, + "balance": False, + } +) + +register( + id='ALRReacherSparse-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": False, + } +) + +register( + id='ALRReacherSparseOptCtrl-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": False, + } +) + +register( + id='ALRReacherSparseBalanced-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": True, + } +) + +register( + id='ALRLongReacher-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 7, + "balance": False, + } +) + +register( + id='ALRLongReacherSparse-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 7, + "balance": False, + } +) + +register( + id='ALRLongReacherSparseBalanced-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 7, + "balance": True, + } +) + +_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] +for i in _vs: + _env_id = f'ALRReacher{i}-v0' + register( + id=_env_id, + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 5, + "balance": False, + 'ctrl_cost_weight': i + } + ) + + _env_id = f'ALRReacherSparse{i}-v0' + register( + id=_env_id, + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": False, + 'ctrl_cost_weight': i + } + ) + +# CtxtFree are v0, Contextual are v1 +register( + id='ALRAntJump-v0', + entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, + "context": False + } +) + +# CtxtFree are v0, Contextual are v1 +register( + id='ALRAntJump-v1', + entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, + "context": True + } +) + +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHalfCheetahJump-v0', + entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHalfCheetahJump-v1', + entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, + "context": True + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHopperJump-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": False, + "healthy_reward": 1.0 + } +) +register( + id='ALRHopperJump-v1', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": True + } +) + +register( + id='ALRHopperJump-v2', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP + } +) + +register( + id='ALRHopperJump-v3', + entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": True, + "healthy_reward": 1.0 + } +) + +##### Hopper Jump step based reward +register( + id='ALRHopperJump-v4', + entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, + "context": True, + "healthy_reward": 1.0 + } +) + + +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHopperJumpOnBox-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHopperJumpOnBox-v1', + entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, + "context": True + } +) +# CtxtFree are v0, Contextual are v1 + +register( + id='ALRHopperThrow-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHopperThrow-v1', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, + "context": True + } +) +# CtxtFree are v0, Contextual are v1 + +register( + id='ALRHopperThrowInBasket-v0', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRHopperThrowInBasket-v1', + entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, + "context": True + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRWalker2DJump-v0', + entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, + "context": False + } +) +# CtxtFree are v0, Contextual are v1 +register( + id='ALRWalker2DJump-v1', + entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, + kwargs={ + "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, + "context": True + } +) + +## Balancing Reacher + +register( + id='Balancing-v0', + entry_point='alr_envs.alr.mujoco:BalancingEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + } +) + +## Table Tennis +register(id='TableTennis2DCtxt-v0', + entry_point='alr_envs.alr.mujoco:TTEnvGym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim': 2}) + +register(id='TableTennis2DCtxt-v1', + entry_point='alr_envs.alr.mujoco:TTEnvGym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim': 2, 'fixed_goal': True}) + +register(id='TableTennis4DCtxt-v0', + entry_point='alr_envs.alr.mujocco:TTEnvGym', + max_episode_steps=MAX_EPISODE_STEPS, + kwargs={'ctxt_dim': 4}) + +## BeerPong +# fixed goal cup position +register( + id='ALRBeerPong-v0', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=300, + kwargs={ + "rndm_goal": False, + "cup_goal_pos": [0.1, -2.0], + "frame_skip": 2 + } + ) + + +# random goal cup position +register( + id='ALRBeerPong-v1', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } + ) + +# random goal cup position +register( + id='ALRBeerPong-v2', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } + ) +# Beerpong with episodic reward, but fixed release time step +register( + id='ALRBeerPong-v3', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } + ) + +# Beerpong with episodic reward, but fixed release time step +register( + id='ALRBeerPong-v4', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } + ) + +# Motion Primitive Environments + +## Simple Reacher +_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}DMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 2, + "alpha_phase": 2, + "learn_goal": True, + "policy_type": "motor", + "weights_scale": 50, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'{_name[0]}ProMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True, + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +# Viapoint reacher +register( + id='ViaPointReacherDMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": "alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "policy_type": "velocity", + "weights_scale": 50, + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") + +register( + id="ViaPointReacherProMP-v0", + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "policy_type": "velocity", + "weights_scale": 1, + "zero_start": True + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") + +## Hole Reacher +_versions = ["v0", "v1", "v2"] +for _v in _versions: + _env_id = f'HoleReacherDMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2.5, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'HoleReacherProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 3, + "duration": 2, + "policy_type": "velocity", + "weights_scale": 5, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## ALRReacher +_versions = ["ALRReacher-v0", "ALRLongReacher-v0", "ALRReacherSparse-v0", "ALRLongReacherSparse-v0"] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}DMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [mujoco.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5 if "long" not in _v.lower() else 7, + "num_basis": 2, + "duration": 4, + "alpha_phase": 2, + "learn_goal": True, + "policy_type": "motor", + "weights_scale": 5, + "policy_kwargs": { + "p_gains": 1, + "d_gains": 0.1 + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'{_name[0]}ProMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [mujoco.reacher.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 5 if "long" not in _v.lower() else 7 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 4, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 1, + "d_gains": 0.1 + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 2, + 'num_basis_zero_start': 1 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + + +_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] +for i in _vs: + _env_id = f'ALRReacher{i}ProMP-v0' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:{_env_id.replace('ProMP', '')}", + "wrappers": [mujoco.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 4, + "policy_type": "motor", + # "weights_scale": 5, + "n_zero_basis": 1, + "zero_start": True, + "policy_kwargs": { + "p_gains": 1, + "d_gains": 0.1 + } + } + } + ) + + _env_id = f'ALRReacherSparse{i}ProMP-v0' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:{_env_id.replace('ProMP', '')}", + "wrappers": [mujoco.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 4, + "policy_type": "motor", + # "weights_scale": 5, + "n_zero_basis": 1, + "zero_start": True, + "policy_kwargs": { + "p_gains": 1, + "d_gains": 0.1 + } + } + } + ) + + +# ## Beerpong +# _versions = ["v0", "v1"] +# for _v in _versions: +# _env_id = f'BeerpongProMP-{_v}' +# register( +# id=_env_id, +# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', +# kwargs={ +# "name": f"alr_envs:ALRBeerPong-{_v}", +# "wrappers": [mujoco.beerpong.MPWrapper], +# "mp_kwargs": { +# "num_dof": 7, +# "num_basis": 2, +# # "duration": 1, +# "duration": 0.5, +# # "post_traj_time": 2, +# "post_traj_time": 2.5, +# "policy_type": "motor", +# "weights_scale": 0.14, +# # "weights_scale": 1, +# "zero_start": True, +# "zero_goal": False, +# "policy_kwargs": { +# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), +# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) +# } +# } +# } +# ) +# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## Beerpong +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'BeerpongProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRBeerPong-{_v}", + "wrappers": [mujoco.beerpong.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 7 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 0.8, # initial value + 'learn_tau': True, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 2, + 'num_basis_zero_start': 2 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## Beerpong ProMP fixed release +_env_id = 'BeerpongProMP-v2' +register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": "alr_envs:ALRBeerPong-v4", + "wrappers": [mujoco.beerpong.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 7 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 0.62, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 2, + 'num_basis_zero_start': 2 + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## Table Tennis +ctxt_dim = [2, 4] +for _v, cd in enumerate(ctxt_dim): + _env_id = f'TableTennisProMP-v{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd), + "wrappers": [mujoco.table_tennis.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "duration": 1.25, + "post_traj_time": 1.5, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), + "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## AntJump +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRAntJumpProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRAntJump-{_v}", + "wrappers": [mujoco.ant_jump.MPWrapper], + "mp_kwargs": { + "num_dof": 8, + "num_basis": 5, + "duration": 10, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(8), + "d_gains": 0.1*np.ones(8) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## AntJump +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRAntJumpProMP-{_v}' + register( + id= _env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRAntJump-{_v}", + "wrappers": [mujoco.ant_jump.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 8 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 10, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(8), + "d_gains": 0.1*np.ones(8), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 2 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + + + +## HalfCheetahJump +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRHalfCheetahJumpProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHalfCheetahJump-{_v}", + "wrappers": [mujoco.half_cheetah_jump.MPWrapper], + "mp_kwargs": { + "num_dof": 6, + "num_basis": 5, + "duration": 5, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(6), + "d_gains": 0.1*np.ones(6) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +# ## HopperJump +# _versions = ["v0", "v1"] +# for _v in _versions: +# _env_id = f'ALRHopperJumpProMP-{_v}' +# register( +# id= _env_id, +# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', +# kwargs={ +# "name": f"alr_envs:ALRHopperJump-{_v}", +# "wrappers": [mujoco.hopper_jump.MPWrapper], +# "mp_kwargs": { +# "num_dof": 3, +# "num_basis": 5, +# "duration": 2, +# "post_traj_time": 0, +# "policy_type": "motor", +# "weights_scale": 1.0, +# "zero_start": True, +# "zero_goal": False, +# "policy_kwargs": { +# "p_gains": np.ones(3), +# "d_gains": 0.1*np.ones(3) +# } +# } +# } +# ) +# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +# ## HopperJump +# register( +# id= "ALRHopperJumpProMP-v2", +# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', +# kwargs={ +# "name": f"alr_envs:ALRHopperJump-v2", +# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper], +# "mp_kwargs": { +# "num_dof": 3, +# "num_basis": 5, +# "duration": 2, +# "post_traj_time": 0, +# "policy_type": "motor", +# "weights_scale": 1.0, +# "zero_start": True, +# "zero_goal": False, +# "policy_kwargs": { +# "p_gains": np.ones(3), +# "d_gains": 0.1*np.ones(3) +# } +# } +# } +# ) +# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") + +## HopperJump +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRHopperJumpProMP-{_v}' + register( + id= _env_id, + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJump-{_v}", + "wrappers": [mujoco.hopper_jump.NewMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 3 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 2, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## HopperJump +register( + id= "ALRHopperJumpProMP-v2", + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJump-v2", + "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 3 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 2, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") + + +## HopperJump +register( + id= "ALRHopperJumpProMP-v3", + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJump-v3", + "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 3 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 2, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3") + + +## HopperJump +register( + id= "ALRHopperJumpProMP-v4", + entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJump-v4", + "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], + "ep_wrapper_kwargs": { + "weight_scale": 1 + }, + "movement_primitives_kwargs": { + 'movement_primitives_type': 'promp', + 'action_dim': 3 + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'linear', + 'delay': 0, + 'tau': 2, # initial value + 'learn_tau': False, + 'learn_delay': False + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3), + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'zero_rbf', + 'num_basis': 5, + 'num_basis_zero_start': 1 + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4") + +## HopperJumpOnBox +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRHopperJumpOnBoxProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperJumpOnBox-{_v}", + "wrappers": [mujoco.hopper_jump.MPWrapper], + "mp_kwargs": { + "num_dof": 3, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +#HopperThrow +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRHopperThrowProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperThrow-{_v}", + "wrappers": [mujoco.hopper_throw.MPWrapper], + "mp_kwargs": { + "num_dof": 3, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## HopperThrowInBasket +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRHopperThrowInBasketProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRHopperThrowInBasket-{_v}", + "wrappers": [mujoco.hopper_throw.MPWrapper], + "mp_kwargs": { + "num_dof": 3, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(3), + "d_gains": 0.1*np.ones(3) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + +## Walker2DJump +_versions = ["v0", "v1"] +for _v in _versions: + _env_id = f'ALRWalker2DJumpProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRWalker2DJump-{_v}", + "wrappers": [mujoco.walker_2d_jump.MPWrapper], + "mp_kwargs": { + "num_dof": 6, + "num_basis": 5, + "duration": 2.4, + "post_traj_time": 0, + "policy_type": "motor", + "weights_scale": 1.0, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.ones(6), + "d_gains": 0.1*np.ones(6) + } + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py new file mode 100644 index 0000000..0699c44 --- /dev/null +++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py @@ -0,0 +1,152 @@ +import os + +import numpy as np +from gym import utils +from gym.envs.mujoco import MujocoEnv + +import alr_envs.utils.utils as alr_utils + + +class ALRReacherEnv(MujocoEnv, utils.EzPickle): + def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1, + balance: bool = False): + utils.EzPickle.__init__(**locals()) + + self._steps = 0 + self.steps_before_reward = steps_before_reward + self.n_links = n_links + + self.balance = balance + self.balance_weight = 1.0 + self.ctrl_cost_weight = ctrl_cost_weight + + self.reward_weight = 1 + if steps_before_reward == 200: + self.reward_weight = 200 + elif steps_before_reward == 50: + self.reward_weight = 50 + + if n_links == 5: + file_name = 'reacher_5links.xml' + elif n_links == 7: + file_name = 'reacher_7links.xml' + else: + raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.") + + MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2) + + def step(self, a): + self._steps += 1 + + reward_dist = 0.0 + angular_vel = 0.0 + reward_balance = 0.0 + is_delayed = self.steps_before_reward > 0 + reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight + if self._steps >= self.steps_before_reward: + vec = self.get_body_com("fingertip") - self.get_body_com("target") + reward_dist -= self.reward_weight * np.linalg.norm(vec) + if is_delayed: + # avoid giving this penalty for normal step based case + # angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) + angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum() + # if is_delayed: + # # Higher control penalty for sparse reward per timestep + # reward_ctrl *= 10 + + if self.balance: + reward_balance -= self.balance_weight * np.abs( + alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) + + reward = reward_dist + reward_ctrl + angular_vel + reward_balance + self.do_simulation(a, self.frame_skip) + ob = self._get_obs() + done = False + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + velocity=angular_vel, reward_balance=reward_balance, + end_effector=self.get_body_com("fingertip").copy(), + goal=self.goal if hasattr(self, "goal") else None) + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 0 + + # def reset_model(self): + # qpos = self.init_qpos + # if not hasattr(self, "goal"): + # self.goal = np.array([-0.25, 0.25]) + # # self.goal = self.init_qpos.copy()[:2] + 0.05 + # qpos[-2:] = self.goal + # qvel = self.init_qvel + # qvel[-2:] = 0 + # self.set_state(qpos, qvel) + # self._steps = 0 + # + # return self._get_obs() + + def reset_model(self): + qpos = self.init_qpos.copy() + while True: + # full space + # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) + # I Quadrant + # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2) + # II Quadrant + # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2) + # II + III Quadrant + # self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2) + # I + II Quadrant + self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2) + if np.linalg.norm(self.goal) < self.n_links / 10: + break + qpos[-2:] = self.goal + qvel = self.init_qvel.copy() + qvel[-2:] = 0 + self.set_state(qpos, qvel) + self._steps = 0 + + return self._get_obs() + + # def reset_model(self): + # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos + # while True: + # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) + # if np.linalg.norm(self.goal) < self.n_links / 10: + # break + # qpos[-2:] = self.goal + # qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) + # qvel[-2:] = 0 + # self.set_state(qpos, qvel) + # self._steps = 0 + # + # return self._get_obs() + + def _get_obs(self): + theta = self.sim.data.qpos.flat[:self.n_links] + target = self.get_body_com("target") + return np.concatenate([ + np.cos(theta), + np.sin(theta), + target[:2], # x-y of goal position + self.sim.data.qvel.flat[:self.n_links], # angular velocity + self.get_body_com("fingertip") - target, # goal distance + [self._steps], + ]) + + +if __name__ == '__main__': + nl = 5 + render_mode = "human" # "human" or "partial" or "final" + env = ALRReacherEnv(n_links=nl) + obs = env.reset() + + for i in range(2000): + # objective.load_result("/tmp/cma") + # test with random actions + ac = env.action_space.sample() + obs, rew, d, info = env.step(ac) + if i % 10 == 0: + env.render(mode=render_mode) + if d: + env.reset() + + env.close() diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py new file mode 100644 index 0000000..bf59380 --- /dev/null +++ b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py @@ -0,0 +1,24 @@ +from alr_envs.mp.episodic_wrapper import EpisodicWrapper +from typing import Union, Tuple +import numpy as np + + +class NewMPWrapper(EpisodicWrapper): + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.sim.data.qpos.flat[:self.env.n_links] + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.sim.data.qvel.flat[:self.env.n_links] + + def set_active_obs(self): + return np.concatenate([ + [False] * self.env.n_links, # cos + [False] * self.env.n_links, # sin + [True] * 2, # goal position + [False] * self.env.n_links, # angular velocity + [False] * 3, # goal distance + # self.get_body_com("target"), # only return target to make problem harder + [False], # step + ]) diff --git a/fancy_gym/envs/classic_control/simple_reacher/__init__.py b/fancy_gym/envs/classic_control/simple_reacher/__init__.py index 989b5a9..5d15867 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/__init__.py +++ b/fancy_gym/envs/classic_control/simple_reacher/__init__.py @@ -1 +1,2 @@ -from .mp_wrapper import MPWrapper \ No newline at end of file +from .mp_wrapper import MPWrapper +from .new_mp_wrapper import NewMPWrapper