refactored meshes, cleaned up init -- now with DMP --

This commit is contained in:
Onur 2022-07-06 11:29:04 +02:00
parent 78d48c4300
commit 2706af0b77
64 changed files with 109 additions and 162 deletions

View File

@ -3,14 +3,11 @@ from copy import deepcopy
import numpy as np import numpy as np
from gym import register from gym import register
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
from . import classic_control, mujoco from . import classic_control, mujoco
from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
@ -21,17 +18,14 @@ from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_BB_DICT = { DEFAULT_BB_DICT_ProMP = {
"name": 'EnvName', "name": 'EnvName',
"wrappers": [], "wrappers": [],
"trajectory_generator_kwargs": { "trajectory_generator_kwargs": {
'trajectory_generator_type': 'promp' 'trajectory_generator_type': 'promp'
}, },
"phase_generator_kwargs": { "phase_generator_kwargs": {
'phase_generator_type': 'linear', 'phase_generator_type': 'linear'
'delay': 0,
'learn_tau': False,
'learn_delay': False
}, },
"controller_kwargs": { "controller_kwargs": {
'controller_type': 'motor', 'controller_type': 'motor',
@ -45,6 +39,26 @@ DEFAULT_BB_DICT = {
} }
} }
DEFAULT_BB_DICT_DMP = {
"name": 'EnvName',
"wrappers": [],
"trajectory_generator_kwargs": {
'trajectory_generator_type': 'dmp'
},
"phase_generator_kwargs": {
'phase_generator_type': 'exp'
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": 1.0,
"d_gains": 0.1,
},
"basis_generator_kwargs": {
'basis_generator_type': 'rbf',
'num_basis': 5
}
}
# Classic Control # Classic Control
## Simple Reacher ## Simple Reacher
register( register(
@ -199,130 +213,83 @@ register(
} }
) )
## Table Tennis
register(id='TableTennis2DCtxt-v0',
entry_point='alr_envs.alr.mujoco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 2})
register(id='TableTennis4DCtxt-v0',
entry_point='alr_envs.alr.mujocco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 4})
register( register(
id='BeerPong-v0', id='BeerPong-v0',
entry_point='alr_envs.alr.mujoco:BeerBongEnv', entry_point='alr_envs.alr.mujoco:BeerPongEnv',
max_episode_steps=300, max_episode_steps=300,
kwargs={
"frame_skip": 2
}
) )
register( # Here we use the same reward as in BeerPong-v0, but now consider after the release,
id='BeerPong-v1',
entry_point='alr_envs.alr.mujoco:BeerBongEnv',
max_episode_steps=300,
kwargs={
"frame_skip": 2
}
)
# Here we use the same reward as in ALRBeerPong-v0, but now consider after the release,
# only one time step, i.e. we simulate until the end of th episode # only one time step, i.e. we simulate until the end of th episode
register( register(
id='BeerPongStepBased-v0', id='BeerPongStepBased-v0',
entry_point='alr_envs.alr.mujoco:BeerBongEnvStepBased', entry_point='alr_envs.alr.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=300, max_episode_steps=300,
kwargs={
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
) )
# Beerpong with episodic reward, but fixed release time step # Beerpong with episodic reward, but fixed release time step
register( register(
id='BeerPongFixedRelease-v0', id='BeerPongFixedRelease-v0',
entry_point='alr_envs.alr.mujoco:BeerBongEnvFixedReleaseStep', entry_point='alr_envs.alr.mujoco:BeerPongEnvFixedReleaseStep',
max_episode_steps=300, max_episode_steps=300,
kwargs={
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
) )
# Motion Primitive Environments # Motion Primitive Environments
## Simple Reacher ## Simple Reacher
_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"] _versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"]
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}' _env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_simple_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_simple_reacher_dmp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_dmp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_dmp['controller_kwargs']['d_gains'] = 0.075
kwargs_dict_simple_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50
kwargs_dict_simple_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_simple_reacher_dmp['name'] = f"{_v}"
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, kwargs=kwargs_dict_simple_reacher_dmp
kwargs={
"name": f"{_v}",
"wrappers": [classic_control.simple_reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2 if "long" not in _v.lower() else 5,
"num_basis": 5,
"duration": 2,
"alpha_phase": 2,
"learn_goal": True,
"policy_type": "motor",
"weights_scale": 50,
"policy_kwargs": {
"p_gains": .6,
"d_gains": .075
}
}
}
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper) kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6 kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075 kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
kwargs_dict_simple_reacher_promp['name'] = _env_id kwargs_dict_simple_reacher_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_promp kwargs=kwargs_dict_simple_reacher_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# Viapoint reacher # Viapoint reacher
kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_via_point_reacher_dmp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 50
kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0"
register( register(
id='ViaPointReacherDMP-v0', id='ViaPointReacherDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_via_point_reacher_dmp
"name": "ViaPointReacher-v0",
"wrappers": [classic_control.viapoint_reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"policy_type": "velocity",
"weights_scale": 50,
}
}
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0" kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0"
register( register(
id="ViaPointReacherProMP-v0", id="ViaPointReacherProMP-v0",
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_via_point_reacher_promp kwargs=kwargs_dict_via_point_reacher_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
@ -332,37 +299,30 @@ _versions = ["HoleReacher-v0"]
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}' _env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_hole_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_hole_reacher_dmp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_dmp['controller_kwargs']['controller_type'] = 'velocity'
# TODO: Before it was weight scale 50 and goal scale 0.1. We now only have weight scale and thus set it to 500. Check
kwargs_dict_hole_reacher_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_hole_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2.5
kwargs_dict_hole_reacher_dmp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_hole_reacher_dmp
"name": f"HoleReacher-{_v}",
"wrappers": [classic_control.hole_reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2.5,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
}
}
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper) kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2 kwargs_dict_hole_reacher_promp['trajectory_generator_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_hole_reacher_promp['name'] = f"{_v}" kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hole_reacher_promp kwargs=kwargs_dict_hole_reacher_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -372,36 +332,26 @@ _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSpa
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}' _env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_reacherNd_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacherNd_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacherNd_dmp['trajectory_generator_kwargs']['weight_scale'] = 5
kwargs_dict_reacherNd_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacherNd_dmp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_reacherNd_dmp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_reacherNd_dmp
"name": f"{_v}",
"wrappers": [mujoco.reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 5 if "long" not in _v.lower() else 7,
"num_basis": 2,
"duration": 4,
"alpha_phase": 2,
"learn_goal": True,
"policy_type": "motor",
"weights_scale": 5,
"policy_kwargs": {
"p_gains": 1,
"d_gains": 0.1
}
}
}
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1 kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1 kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
kwargs_dict_alr_reacher_promp['name'] = f"{_v}" kwargs_dict_alr_reacher_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
@ -415,14 +365,14 @@ _versions = ['BeerPong-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = f"{_v}" kwargs_dict_bp_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
@ -435,17 +385,17 @@ _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"]
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62 kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) kwargs_dict_bp_promp['controller_kwargs']['d_gains'] = np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2 kwargs_dict_bp_promp['basis_generator_kwargs']['num_basis_zero_start'] = 2
kwargs_dict_bp_promp['name'] = f"{_v}" kwargs_dict_bp_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp kwargs=kwargs_dict_bp_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -460,12 +410,12 @@ _versions = ['ALRAntJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper) kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
kwargs_dict_ant_jump_promp['name'] = f"{_v}" kwargs_dict_ant_jump_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_ant_jump_promp kwargs=kwargs_dict_ant_jump_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -477,12 +427,12 @@ _versions = ['ALRHalfCheetahJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper) kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
kwargs_dict_halfcheetah_jump_promp['name'] = f"{_v}" kwargs_dict_halfcheetah_jump_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_halfcheetah_jump_promp kwargs=kwargs_dict_halfcheetah_jump_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -491,18 +441,18 @@ for _v in _versions:
## HopperJump ## HopperJump
_versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJumpRndmJointsDesPosStepBased-v0', _versions = ['HopperJump-v0', 'HopperJumpSparse-v0', 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0',
'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0'] 'ALRHopperThrowInBasket-v0']
# TODO: Check if all environments work with the same MPWrapper # TODO: Check if all environments work with the same MPWrapper
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper) kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
kwargs_dict_hopper_jump_promp['name'] = f"{_v}" kwargs_dict_hopper_jump_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hopper_jump_promp kwargs=kwargs_dict_hopper_jump_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -515,12 +465,12 @@ _versions = ['ALRWalker2DJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT) kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper) kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
kwargs_dict_walker2d_jump_promp['name'] = f"{_v}" kwargs_dict_walker2d_jump_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_walker2d_jump_promp kwargs=kwargs_dict_walker2d_jump_promp
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)

View File

@ -1,11 +1,8 @@
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
from .ant_jump.ant_jump import AntJumpEnv from .ant_jump.ant_jump import AntJumpEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from alr_envs.alr.mujoco.beerpong.beerpong import BeerPongEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv from .reacher.reacher import ReacherEnv
from .table_tennis.tt_gym import TTEnvGym
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv

View File

@ -1,5 +1,5 @@
<mujoco model="wam(v1.31)"> <mujoco model="wam(v1.31)">
<compiler angle="radian" meshdir="../../meshes/wam/" /> <compiler angle="radian" meshdir="./meshes/wam/" />
<option timestep="0.005" integrator="Euler" /> <option timestep="0.005" integrator="Euler" />
<size njmax="500" nconmax="100" /> <size njmax="500" nconmax="100" />
<default class="main"> <default class="main">

View File

@ -48,6 +48,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
self.ep_length = 600 // frame_skip self.ep_length = 600 // frame_skip
self.repeat_action = frame_skip self.repeat_action = frame_skip
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
self.model = None self.model = None
self.site_id = lambda x: self.model.site_name2id(x) self.site_id = lambda x: self.model.site_name2id(x)
self.body_id = lambda x: self.model.body_name2id(x) self.body_id = lambda x: self.model.body_name2id(x)
@ -64,7 +65,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
self.ball_in_cup = False self.ball_in_cup = False
self.dist_ground_cup = -1 # distance floor to cup if first floor contact self.dist_ground_cup = -1 # distance floor to cup if first floor contact
MujocoEnv.__init__(self, self.xml_path, frame_skip=1) MujocoEnv.__init__(self, self.xml_path, frame_skip=1, mujoco_bindings="mujoco_py")
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
@property @property
@ -99,25 +100,25 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
# TODO: Ask Max why we need to set the state twice. # TODO: Ask Max why we need to set the state twice.
self.set_state(start_pos, init_vel) self.set_state(start_pos, init_vel)
start_pos[7::] = self.sim.data.site_xpos[self.site_id("init_ball_pos"), :].copy() start_pos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
self.set_state(start_pos, init_vel) self.set_state(start_pos, init_vel)
xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max) xy = self.np_random.uniform(self._cup_pos_min, self._cup_pos_max)
xyz = np.zeros(3) xyz = np.zeros(3)
xyz[:2] = xy xyz[:2] = xy
xyz[-1] = 0.840 xyz[-1] = 0.840
self.sim.model.body_pos[self.body_id("cup_table")] = xyz self.model.body_pos[self.body_id("cup_table")] = xyz
return self._get_obs() return self._get_obs()
def step(self, a): def step(self, a):
crash = False crash = False
for _ in range(self.repeat_action): for _ in range(self.repeat_action):
applied_action = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] applied_action = a + self.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
try: try:
self.do_simulation(applied_action, self.frame_skip) self.do_simulation(applied_action, self.frame_skip)
# self.reward_function.check_contacts(self.sim) # I assume this is not important? # self.reward_function.check_contacts(self.sim) # I assume this is not important?
if self._steps < self.release_step: if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.site_id("init_ball_pos"), :].copy() self.data.qpos[7::] = self.data.site_xpos[self.site_id("init_ball_pos"), :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.site_id("init_ball_pos"), :].copy() self.data.qvel[7::] = self.data.site_xvelp[self.site_id("init_ball_pos"), :].copy()
crash = False crash = False
except mujoco_py.builder.MujocoException: except mujoco_py.builder.MujocoException:
crash = True crash = True
@ -137,15 +138,15 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
infos = dict( infos = dict(
reward=reward, reward=reward,
action=a, action=a,
q_pos=self.sim.data.qpos[0:7].ravel().copy(), q_pos=self.data.qpos[0:7].ravel().copy(),
q_vel=self.sim.data.qvel[0:7].ravel().copy(), sim_crash=crash, q_vel=self.data.qvel[0:7].ravel().copy(), sim_crash=crash,
) )
infos.update(reward_infos) infos.update(reward_infos)
return ob, reward, done, infos return ob, reward, done, infos
def _get_obs(self): def _get_obs(self):
theta = self.sim.data.qpos.flat[:7] theta = self.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7] theta_dot = self.data.qvel.flat[:7]
ball_pos = self.data.get_body_xpos("ball").copy() ball_pos = self.data.get_body_xpos("ball").copy()
cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy() cup_goal_diff_final = ball_pos - self.data.get_site_xpos("cup_goal_final_table").copy()
cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy() cup_goal_diff_top = ball_pos - self.data.get_site_xpos("cup_goal_table").copy()
@ -155,7 +156,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
theta_dot, theta_dot,
cup_goal_diff_final, cup_goal_diff_final,
cup_goal_diff_top, cup_goal_diff_top,
self.sim.model.body_pos[self.body_id("cup_table")][:2].copy(), self.model.body_pos[self.body_id("cup_table")][:2].copy(),
# [self._steps], # Use TimeAwareObservation Wrapper instead .... # [self._steps], # Use TimeAwareObservation Wrapper instead ....
]) ])
@ -241,8 +242,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
If id_set_2 is set to None, it will check for a collision with itself (id_set_1). If id_set_2 is set to None, it will check for a collision with itself (id_set_1).
""" """
collision_id_set = id_set_2 - id_set_1 if id_set_2 is not None else id_set_1 collision_id_set = id_set_2 - id_set_1 if id_set_2 is not None else id_set_1
for coni in range(self.sim.data.ncon): for coni in range(self.data.ncon):
con = self.sim.data.contact[coni] con = self.data.contact[coni]
if ((con.geom1 in id_set_1 and con.geom2 in collision_id_set) or if ((con.geom1 in id_set_1 and con.geom2 in collision_id_set) or
(con.geom2 in id_set_1 and con.geom1 in collision_id_set)): (con.geom2 in id_set_1 and con.geom1 in collision_id_set)):
return True return True

View File

@ -58,7 +58,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
return -self.reward_weight * np.linalg.norm(vec) return -self.reward_weight * np.linalg.norm(vec)
def velocity_reward(self): def velocity_reward(self):
return -10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum() if self.sparse else 0.0 return -10 * np.square(self.data.qvel.flat[:self.n_links]).sum() if self.sparse else 0.0
def viewer_setup(self): def viewer_setup(self):
self.viewer.cam.trackbodyid = 0 self.viewer.cam.trackbodyid = 0

View File

@ -75,7 +75,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing # TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
# TODO Check with Bruce for replanning # TODO Check with Bruce for replanning
self.traj_gen.set_boundary_conditions( self.traj_gen.set_boundary_conditions(
bc_time=np.zeros((1,)) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]), bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
bc_pos=self.current_pos, bc_vel=self.current_vel) bc_pos=self.current_pos, bc_vel=self.current_vel)
# TODO: is this correct for replanning? Do we need to adjust anything here? # TODO: is this correct for replanning? Do we need to adjust anything here?
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]), self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]),

View File

@ -43,7 +43,6 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
def make(env_id, seed, **kwargs): def make(env_id, seed, **kwargs):
# TODO: This doesn't work with gym ==0.21.0
# This access is required to allow for nested dict updates # This access is required to allow for nested dict updates
spec = registry.get(env_id) spec = registry.get(env_id)
all_kwargs = deepcopy(spec.kwargs) all_kwargs = deepcopy(spec.kwargs)