From 2706af0b77a688d55075fabc2f821a85e4066a65 Mon Sep 17 00:00:00 2001 From: Onur Date: Wed, 6 Jul 2022 11:29:04 +0200 Subject: [PATCH] refactored meshes, cleaned up init -- now with DMP -- --- alr_envs/alr/__init__.py | 230 +++++++----------- alr_envs/alr/mujoco/__init__.py | 5 +- .../assets/beerpong_wo_cup_big_table.xml | 2 +- .../assets}/meshes/wam/base_link_convex.stl | Bin .../assets}/meshes/wam/base_link_fine.stl | Bin .../wam/bhand_finger_dist_link_convex.stl | Bin .../wam/bhand_finger_dist_link_fine.stl | Bin .../wam/bhand_finger_med_link_convex.stl | Bin .../meshes/wam/bhand_finger_med_link_fine.stl | Bin ...nger_prox_link_convex_decomposition_p1.stl | Bin ...nger_prox_link_convex_decomposition_p2.stl | Bin ...nger_prox_link_convex_decomposition_p3.stl | Bin .../wam/bhand_finger_prox_link_fine.stl | Bin .../assets}/meshes/wam/bhand_palm_fine.stl | Bin ...hand_palm_link_convex_decomposition_p1.stl | Bin ...hand_palm_link_convex_decomposition_p2.stl | Bin ...hand_palm_link_convex_decomposition_p3.stl | Bin ...hand_palm_link_convex_decomposition_p4.stl | Bin .../{ => beerpong/assets}/meshes/wam/cup.stl | Bin .../assets}/meshes/wam/cup_split1.stl | Bin .../assets}/meshes/wam/cup_split10.stl | Bin .../assets}/meshes/wam/cup_split11.stl | Bin .../assets}/meshes/wam/cup_split12.stl | Bin .../assets}/meshes/wam/cup_split13.stl | Bin .../assets}/meshes/wam/cup_split14.stl | Bin .../assets}/meshes/wam/cup_split15.stl | Bin .../assets}/meshes/wam/cup_split16.stl | Bin .../assets}/meshes/wam/cup_split17.stl | Bin .../assets}/meshes/wam/cup_split18.stl | Bin .../assets}/meshes/wam/cup_split2.stl | Bin .../assets}/meshes/wam/cup_split3.stl | Bin .../assets}/meshes/wam/cup_split4.stl | Bin .../assets}/meshes/wam/cup_split5.stl | Bin .../assets}/meshes/wam/cup_split6.stl | Bin .../assets}/meshes/wam/cup_split7.stl | Bin .../assets}/meshes/wam/cup_split8.stl | Bin .../assets}/meshes/wam/cup_split9.stl | Bin .../assets}/meshes/wam/elbow_link_convex.stl | Bin .../assets}/meshes/wam/elbow_link_fine.stl | Bin .../forearm_link_convex_decomposition_p1.stl | Bin .../forearm_link_convex_decomposition_p2.stl | Bin .../assets}/meshes/wam/forearm_link_fine.stl | Bin .../shoulder_link_convex_decomposition_p1.stl | Bin .../shoulder_link_convex_decomposition_p2.stl | Bin .../shoulder_link_convex_decomposition_p3.stl | Bin .../assets}/meshes/wam/shoulder_link_fine.stl | Bin .../meshes/wam/shoulder_pitch_link_convex.stl | Bin .../meshes/wam/shoulder_pitch_link_fine.stl | Bin ...upper_arm_link_convex_decomposition_p1.stl | Bin ...upper_arm_link_convex_decomposition_p2.stl | Bin .../meshes/wam/upper_arm_link_fine.stl | Bin .../meshes/wam/wrist_palm_link_convex.stl | Bin .../meshes/wam/wrist_palm_link_fine.stl | Bin ...ist_pitch_link_convex_decomposition_p1.stl | Bin ...ist_pitch_link_convex_decomposition_p2.stl | Bin ...ist_pitch_link_convex_decomposition_p3.stl | Bin .../meshes/wam/wrist_pitch_link_fine.stl | Bin ...wrist_yaw_link_convex_decomposition_p1.stl | Bin ...wrist_yaw_link_convex_decomposition_p2.stl | Bin .../meshes/wam/wrist_yaw_link_fine.stl | Bin alr_envs/alr/mujoco/beerpong/beerpong.py | 29 +-- alr_envs/alr/mujoco/reacher/reacher.py | 2 +- alr_envs/black_box/black_box_wrapper.py | 2 +- alr_envs/utils/make_env_helpers.py | 1 - 64 files changed, 109 insertions(+), 162 deletions(-) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/base_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/base_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_dist_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_dist_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_med_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_med_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_finger_prox_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_palm_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split10.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split11.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split12.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split13.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split14.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split15.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split16.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split17.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split18.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split3.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split4.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split5.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split6.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split7.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split8.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/cup_split9.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/elbow_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/elbow_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/forearm_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/forearm_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/forearm_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_link_convex_decomposition_p3.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_pitch_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/shoulder_pitch_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/upper_arm_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/upper_arm_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/upper_arm_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_palm_link_convex.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_palm_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_pitch_link_fine.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl (100%) rename alr_envs/alr/mujoco/{ => beerpong/assets}/meshes/wam/wrist_yaw_link_fine.stl (100%) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index cdff3a1..1b7d378 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -3,14 +3,11 @@ from copy import deepcopy import numpy as np from gym import register -from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS from . import classic_control, mujoco from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP -from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX @@ -21,17 +18,14 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} -DEFAULT_BB_DICT = { +DEFAULT_BB_DICT_ProMP = { "name": 'EnvName', "wrappers": [], "trajectory_generator_kwargs": { 'trajectory_generator_type': 'promp' }, "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'learn_tau': False, - 'learn_delay': False + 'phase_generator_type': 'linear' }, "controller_kwargs": { 'controller_type': 'motor', @@ -45,6 +39,26 @@ DEFAULT_BB_DICT = { } } +DEFAULT_BB_DICT_DMP = { + "name": 'EnvName', + "wrappers": [], + "trajectory_generator_kwargs": { + 'trajectory_generator_type': 'dmp' + }, + "phase_generator_kwargs": { + 'phase_generator_type': 'exp' + }, + "controller_kwargs": { + 'controller_type': 'motor', + "p_gains": 1.0, + "d_gains": 0.1, + }, + "basis_generator_kwargs": { + 'basis_generator_type': 'rbf', + 'num_basis': 5 + } +} + # Classic Control ## Simple Reacher register( @@ -199,130 +213,83 @@ register( } ) -## Table Tennis -register(id='TableTennis2DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 2}) - -register(id='TableTennis4DCtxt-v0', - entry_point='alr_envs.alr.mujocco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 4}) - register( id='BeerPong-v0', - entry_point='alr_envs.alr.mujoco:BeerBongEnv', + entry_point='alr_envs.alr.mujoco:BeerPongEnv', max_episode_steps=300, - kwargs={ - "frame_skip": 2 - } ) -register( - id='BeerPong-v1', - entry_point='alr_envs.alr.mujoco:BeerBongEnv', - max_episode_steps=300, - kwargs={ - "frame_skip": 2 - } -) - -# Here we use the same reward as in ALRBeerPong-v0, but now consider after the release, +# Here we use the same reward as in BeerPong-v0, but now consider after the release, # only one time step, i.e. we simulate until the end of th episode register( id='BeerPongStepBased-v0', - entry_point='alr_envs.alr.mujoco:BeerBongEnvStepBased', + entry_point='alr_envs.alr.mujoco:BeerPongEnvStepBasedEpisodicReward', max_episode_steps=300, - kwargs={ - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } ) # Beerpong with episodic reward, but fixed release time step register( id='BeerPongFixedRelease-v0', - entry_point='alr_envs.alr.mujoco:BeerBongEnvFixedReleaseStep', + entry_point='alr_envs.alr.mujoco:BeerPongEnvFixedReleaseStep', max_episode_steps=300, - kwargs={ - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } ) # Motion Primitive Environments ## Simple Reacher -_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"] +_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}DMP-{_name[1]}' + kwargs_dict_simple_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) + kwargs_dict_simple_reacher_dmp['wrappers'].append(classic_control.simple_reacher.MPWrapper) + kwargs_dict_simple_reacher_dmp['controller_kwargs']['p_gains'] = 0.6 + kwargs_dict_simple_reacher_dmp['controller_kwargs']['d_gains'] = 0.075 + kwargs_dict_simple_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50 + kwargs_dict_simple_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 + kwargs_dict_simple_reacher_dmp['name'] = f"{_v}" register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "alpha_phase": 2, - "learn_goal": True, - "policy_type": "motor", - "weights_scale": 50, - "policy_kwargs": { - "p_gains": .6, - "d_gains": .075 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_simple_reacher_dmp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper) kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6 kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075 - kwargs_dict_simple_reacher_promp['name'] = _env_id + kwargs_dict_simple_reacher_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # Viapoint reacher +kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) +kwargs_dict_via_point_reacher_dmp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) +kwargs_dict_via_point_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity' +kwargs_dict_via_point_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50 +kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2 +kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0" register( id='ViaPointReacherDMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": "ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "learn_goal": True, - "alpha_phase": 2, - "policy_type": "velocity", - "weights_scale": 50, - } - } + kwargs=kwargs_dict_via_point_reacher_dmp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") -kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT) +kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' -kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0" +kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0" register( id="ViaPointReacherProMP-v0", - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_via_point_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") @@ -332,37 +299,30 @@ _versions = ["HoleReacher-v0"] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}DMP-{_name[1]}' + kwargs_dict_hole_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) + kwargs_dict_hole_reacher_dmp['wrappers'].append(classic_control.hole_reacher.MPWrapper) + kwargs_dict_hole_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity' + # TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check + kwargs_dict_hole_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 + kwargs_dict_hole_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2.5 + kwargs_dict_hole_reacher_dmp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "learn_goal": True, - "alpha_phase": 2.5, - "bandwidth_factor": 2, - "policy_type": "velocity", - "weights_scale": 50, - "goal_scale": 0.1 - } - } + kwargs=kwargs_dict_hole_reacher_dmp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper) kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2 kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_hole_reacher_promp['name'] = f"{_v}" register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hole_reacher_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -372,36 +332,26 @@ _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSpa for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}DMP-{_name[1]}' + kwargs_dict_reacherNd_dmp = deepcopy(DEFAULT_BB_DICT_DMP) + kwargs_dict_reacherNd_dmp['wrappers'].append(mujoco.reacher.MPWrapper) + kwargs_dict_reacherNd_dmp['trajectory_generator_kwargs']['weight_scale'] = 5 + kwargs_dict_reacherNd_dmp['phase_generator_kwargs']['alpha_phase'] = 2 + kwargs_dict_reacherNd_dmp['basis_generator_kwargs']['num_basis'] = 2 + kwargs_dict_reacherNd_dmp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"{_v}", - "wrappers": [mujoco.reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 5 if "long" not in _v.lower() else 7, - "num_basis": 2, - "duration": 4, - "alpha_phase": 2, - "learn_goal": True, - "policy_type": "motor", - "weights_scale": 5, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } + kwargs=kwargs_dict_reacherNd_dmp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1 kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1 - kwargs_dict_alr_reacher_promp['name'] = f"{_v}" + kwargs_dict_alr_reacher_promp['name'] = _v register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', @@ -415,14 +365,14 @@ _versions = ['BeerPong-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 - kwargs_dict_bp_promp['name'] = f"{_v}" + kwargs_dict_bp_promp['name'] = _v register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', @@ -435,17 +385,17 @@ _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62 kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 - kwargs_dict_bp_promp['name'] = f"{_v}" + kwargs_dict_bp_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -460,12 +410,12 @@ _versions = ['ALRAntJump-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper) - kwargs_dict_ant_jump_promp['name'] = f"{_v}" + kwargs_dict_ant_jump_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_ant_jump_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -477,12 +427,12 @@ _versions = ['ALRHalfCheetahJump-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper) - kwargs_dict_halfcheetah_jump_promp['name'] = f"{_v}" + kwargs_dict_halfcheetah_jump_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_halfcheetah_jump_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -491,18 +441,18 @@ for _v in _versions: ## HopperJump -_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0', - 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0'] +_versions = ['HopperJump-v0', 'HopperJumpSparse-v0', 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', + 'ALRHopperThrowInBasket-v0'] # TODO: Check if all environments work with the same MPWrapper for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper) - kwargs_dict_hopper_jump_promp['name'] = f"{_v}" + kwargs_dict_hopper_jump_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hopper_jump_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -515,12 +465,12 @@ _versions = ['ALRWalker2DJump-v0'] for _v in _versions: _name = _v.split("-") _env_id = f'{_name[0]}ProMP-{_name[1]}' - kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT) + kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper) - kwargs_dict_walker2d_jump_promp['name'] = f"{_v}" + kwargs_dict_walker2d_jump_promp['name'] = _v register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_walker2d_jump_promp ) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index 906a9a5..6e40228 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -1,11 +1,8 @@ +from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward from .ant_jump.ant_jump import AntJumpEnv -from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv -from alr_envs.alr.mujoco.beerpong.beerpong import BeerPongEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv from .reacher.reacher import ReacherEnv -from .table_tennis.tt_gym import TTEnvGym from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml index 99df1d3..756e3d2 100644 --- a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml +++ b/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml @@ -1,5 +1,5 @@ - +