current state

This commit is contained in:
Fabian 2022-06-30 17:33:05 +02:00
parent 60bdeef687
commit fea2ae7d11
52 changed files with 325 additions and 557 deletions

View File

@ -1,33 +1,31 @@
import numpy as np
from gym import register
from copy import deepcopy from copy import deepcopy
import numpy as np
from gym import register
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
from . import classic_control, mujoco from . import classic_control, mujoco
from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .mujoco.reacher.alr_reacher import ALRReacherEnv
from .mujoco.reacher.balancing import BalancingEnv
from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
DEFAULT_MP_ENV_DICT = { DEFAULT_BB_DICT = {
"name": 'EnvName', "name": 'EnvName',
"wrappers": [], "wrappers": [],
"traj_gen_kwargs": { "traj_gen_kwargs": {
"weight_scale": 1, 'trajectory_generator_type': 'promp'
'movement_primitives_type': 'promp'
}, },
"phase_generator_kwargs": { "phase_generator_kwargs": {
'phase_generator_type': 'linear', 'phase_generator_type': 'linear',
@ -100,80 +98,47 @@ register(
# Mujoco # Mujoco
## Reacher ## Reacher
register( for _dims in [5, 7]:
id='ALRReacher-v0', register(
entry_point='alr_envs.alr.mujoco:ALRReacherEnv', id=f'Reacher{_dims}d-v0',
entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"steps_before_reward": 0, "n_links": _dims,
"n_links": 5, }
"balance": False, )
register(
id=f'Reacher{_dims}dSparse-v0',
entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"sparse": True,
"n_links": _dims,
}
)
## Hopper Jump random joints and desired position
register(
id='HopperJumpSparse-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
# "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"sparse": True,
# "healthy_reward": 1.0
} }
) )
## Hopper Jump random joints and desired position step based reward
register( register(
id='ALRReacherSparse-v0', id='HopperJump-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv', entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
max_episode_steps=200, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={ kwargs={
"steps_before_reward": 200, # "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"n_links": 5, "sparse": False,
"balance": False, # "healthy_reward": 1.0
}
)
register(
id='ALRReacherSparseOptCtrl-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 5,
"balance": False,
}
)
register(
id='ALRReacherSparseBalanced-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 5,
"balance": True,
}
)
register(
id='ALRLongReacher-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
"n_links": 7,
"balance": False,
}
)
register(
id='ALRLongReacherSparse-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 7,
"balance": False,
}
)
register(
id='ALRLongReacherSparseBalanced-v0',
entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 7,
"balance": True,
} }
) )
@ -198,41 +163,7 @@ register(
) )
register( register(
id='ALRHopperJump-v0', id='HopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": True
}
)
#### Hopper Jump random joints and des position
register(
id='ALRHopperJumpRndmJointsDesPos-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": True,
"healthy_reward": 1.0
}
)
##### Hopper Jump random joints and des position step based reward
register(
id='ALRHopperJumpRndmJointsDesPosStepBased-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
"context": True,
"healthy_reward": 1.0
}
)
register(
id='ALRHopperJumpOnBox-v0',
entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={ kwargs={
@ -271,17 +202,6 @@ register(
} }
) )
## Balancing Reacher
register(
id='Balancing-v0',
entry_point='alr_envs.alr.mujoco:BalancingEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
}
)
## Table Tennis ## Table Tennis
register(id='TableTennis2DCtxt-v0', register(id='TableTennis2DCtxt-v0',
entry_point='alr_envs.alr.mujoco:TTEnvGym', entry_point='alr_envs.alr.mujoco:TTEnvGym',
@ -361,7 +281,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper) kwargs_dict_simple_reacher_promp['wrappers'].append(classic_control.simple_reacher.MPWrapper)
kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6 kwargs_dict_simple_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075 kwargs_dict_simple_reacher_promp['controller_kwargs']['d_gains'] = 0.075
@ -394,7 +314,7 @@ register(
) )
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0")
kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper)
kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0" kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacherProMP-v0"
@ -433,7 +353,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper) kwargs_dict_hole_reacher_promp['wrappers'].append(classic_control.hole_reacher.MPWrapper)
kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2 kwargs_dict_hole_reacher_promp['traj_gen_kwargs']['weight_scale'] = 2
kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_hole_reacher_promp['controller_kwargs']['controller_type'] = 'velocity'
@ -475,7 +395,7 @@ for _v in _versions:
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1 kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1 kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
@ -493,7 +413,7 @@ _versions = ['ALRBeerPong-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True kwargs_dict_bp_promp['phase_generator_kwargs']['learn_tau'] = True
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
@ -513,7 +433,7 @@ _versions = ["ALRBeerPongStepBased-v0", "ALRBeerPongFixedRelease-v0"]
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_bp_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_bp_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper) kwargs_dict_bp_promp['wrappers'].append(mujoco.beerpong.MPWrapper)
kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62 kwargs_dict_bp_promp['phase_generator_kwargs']['tau'] = 0.62
kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]) kwargs_dict_bp_promp['controller_kwargs']['p_gains'] = np.array([1.5, 5, 2.55, 3, 2., 2, 1.25])
@ -538,7 +458,7 @@ _versions = ['ALRAntJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_ant_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper) kwargs_dict_ant_jump_promp['wrappers'].append(mujoco.ant_jump.MPWrapper)
kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}" kwargs_dict_ant_jump_promp['name'] = f"alr_envs:{_v}"
register( register(
@ -555,7 +475,7 @@ _versions = ['ALRHalfCheetahJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_halfcheetah_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper) kwargs_dict_halfcheetah_jump_promp['wrappers'].append(mujoco.half_cheetah_jump.MPWrapper)
kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}" kwargs_dict_halfcheetah_jump_promp['name'] = f"alr_envs:{_v}"
register( register(
@ -575,7 +495,7 @@ _versions = ['ALRHopperJump-v0', 'ALRHopperJumpRndmJointsDesPos-v0', 'ALRHopperJ
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_hopper_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper) kwargs_dict_hopper_jump_promp['wrappers'].append(mujoco.hopper_jump.MPWrapper)
kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}" kwargs_dict_hopper_jump_promp['name'] = f"alr_envs:{_v}"
register( register(
@ -593,7 +513,7 @@ _versions = ['ALRWalker2DJump-v0']
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_MP_ENV_DICT) kwargs_dict_walker2d_jump_promp = deepcopy(DEFAULT_BB_DICT)
kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper) kwargs_dict_walker2d_jump_promp['wrappers'].append(mujoco.walker_2d_jump.MPWrapper)
kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}" kwargs_dict_walker2d_jump_promp['name'] = f"alr_envs:{_v}"
register( register(
@ -695,7 +615,7 @@ for i in _vs:
_env_id = f'ALRReacher{i}-v0' _env_id = f'ALRReacher{i}-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.alr.mujoco:ALRReacherEnv', entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"steps_before_reward": 0, "steps_before_reward": 0,
@ -708,7 +628,7 @@ for i in _vs:
_env_id = f'ALRReacherSparse{i}-v0' _env_id = f'ALRReacherSparse{i}-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.alr.mujoco:ALRReacherEnv', entry_point='alr_envs.alr.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"steps_before_reward": 200, "steps_before_reward": 200,

View File

@ -0,0 +1,27 @@
from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
def get_context_mask(self):
return np.hstack([
[self.env.random_start] * self.env.n_links, # cos
[self.env.random_start] * self.env.n_links, # sin
[self.env.random_start] * self.env.n_links, # velocity
[self.env.initial_width is None], # hole width
# [self.env.hole_depth is None], # hole depth
[True] * 2, # x-y coordinates of target distance
[False] # env steps
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_pos
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -6,7 +6,7 @@ from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .reacher.alr_reacher import ALRReacherEnv from .reacher.reacher import ReacherEnv
from .reacher.balancing import BalancingEnv from .reacher.balancing import BalancingEnv
from .table_tennis.tt_gym import TTEnvGym from .table_tennis.tt_gym import TTEnvGym
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -1,8 +1,8 @@
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple from typing import Union, Tuple
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class BallInACupMPWrapper(RawInterfaceWrapper): class BallInACupMPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -1 +1 @@
from .new_mp_wrapper import MPWrapper from .mp_wrapper import MPWrapper

View File

@ -1,3 +1,5 @@
from typing import Optional
from gym.envs.mujoco.hopper_v3 import HopperEnv from gym.envs.mujoco.hopper_v3 import HopperEnv
import numpy as np import numpy as np
import os import os
@ -8,10 +10,10 @@ MAX_EPISODE_STEPS_HOPPERJUMP = 250
class ALRHopperJumpEnv(HopperEnv): class ALRHopperJumpEnv(HopperEnv):
""" """
Initialization changes to normal Hopper: Initialization changes to normal Hopper:
- healthy_reward: 1.0 -> 0.1 -> 0 - terminate_when_unhealthy: True -> False
- healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
- healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf')) - healthy_z_range: (0.7, float('inf')) -> (0.5, float('inf'))
- exclude current positions from observatiosn is set to False - healthy_angle_range: (-0.2, 0.2) -> (-float('inf'), float('inf'))
- exclude_current_positions_from_observation: True -> False
""" """
def __init__( def __init__(
@ -19,76 +21,93 @@ class ALRHopperJumpEnv(HopperEnv):
xml_file='hopper_jump.xml', xml_file='hopper_jump.xml',
forward_reward_weight=1.0, forward_reward_weight=1.0,
ctrl_cost_weight=1e-3, ctrl_cost_weight=1e-3,
healthy_reward=0.0, healthy_reward=1.0,
penalty=0.0, penalty=0.0,
context=True,
terminate_when_unhealthy=False, terminate_when_unhealthy=False,
healthy_state_range=(-100.0, 100.0), healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.5, float('inf')), healthy_z_range=(0.5, float('inf')),
healthy_angle_range=(-float('inf'), float('inf')), healthy_angle_range=(-float('inf'), float('inf')),
reset_noise_scale=5e-3, reset_noise_scale=5e-3,
exclude_current_positions_from_observation=False, exclude_current_positions_from_observation=False,
max_episode_steps=250
): ):
self.current_step = 0 self._steps = 0
self.max_height = 0 self.max_height = 0
self.max_episode_steps = max_episode_steps # self.penalty = penalty
self.penalty = penalty
self.goal = 0 self.goal = 0
self.context = context
self.exclude_current_positions_from_observation = exclude_current_positions_from_observation
self._floor_geom_id = None self._floor_geom_id = None
self._foot_geom_id = None self._foot_geom_id = None
self.contact_with_floor = False self.contact_with_floor = False
self.init_floor_contact = False self.init_floor_contact = False
self.has_left_floor = False self.has_left_floor = False
self.contact_dist = None self.contact_dist = None
xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file)
super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy,
healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale,
exclude_current_positions_from_observation) exclude_current_positions_from_observation)
def step(self, action): def step(self, action):
self._steps += 1
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
self.current_step += 1
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2] height_after = self.get_body_com("torso")[2]
# site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() site_pos_after = self.data.get_site_xpos('foot_site')
site_pos_after = self.get_body_com('foot_site')
self.max_height = max(height_after, self.max_height) self.max_height = max(height_after, self.max_height)
has_floor_contact = self._is_floor_foot_contact() if not self.contact_with_floor else False
if not self.init_floor_contact:
self.init_floor_contact = has_floor_contact
if self.init_floor_contact and not self.has_left_floor:
self.has_left_floor = not has_floor_contact
if not self.contact_with_floor and self.has_left_floor:
self.contact_with_floor = has_floor_contact
ctrl_cost = self.control_cost(action) ctrl_cost = self.control_cost(action)
costs = ctrl_cost costs = ctrl_cost
done = False done = False
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
if self.contact_dist is None and self.contact_with_floor:
self.contact_dist = goal_dist
rewards = 0 rewards = 0
if self.current_step >= self.max_episode_steps: if self._steps >= MAX_EPISODE_STEPS_HOPPERJUMP:
hight_goal_distance = -10 * np.linalg.norm(self.max_height - self.goal) if self.context else self.max_height # healthy_reward = 0 if self.context else self.healthy_reward * self._steps
healthy_reward = 0 if self.context else self.healthy_reward * 2 # self.current_step healthy_reward = self.healthy_reward * 2 # * self._steps
height_reward = self._forward_reward_weight * hight_goal_distance # maybe move reward calculation into if structure and define two different _forward_reward_weight variables for context and episodic seperatley contact_dist = self.contact_dist if self.contact_dist is not None else 5
rewards = height_reward + healthy_reward dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
rewards = dist_reward + healthy_reward
observation = self._get_obs() observation = self._get_obs()
reward = rewards - costs reward = rewards - costs
info = dict(
info = { height=height_after,
'height': height_after, x_pos=site_pos_after,
'x_pos': site_pos_after, max_height=self.max_height,
'max_height': self.max_height, goal=self.goal,
'height_rew': self.max_height, goal_dist=goal_dist,
'healthy_reward': self.healthy_reward * 2, height_rew=self.max_height,
'healthy': self.is_healthy healthy_reward=self.healthy_reward * 2,
} healthy=self.is_healthy,
contact_dist=self.contact_dist if self.contact_dist is not None else 0
)
return observation, reward, done, info return observation, reward, done, info
def _get_obs(self): def _get_obs(self):
return np.append(super()._get_obs(), self.goal) return np.append(super()._get_obs(), self.goal)
def reset(self): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ):
self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3 self.goal = self.np_random.uniform(1.4, 2.16, 1)[0] # 1.3 2.3
self.max_height = 0 self.max_height = 0
self.current_step = 0 self._steps = 0
return super().reset() return super().reset()
# overwrite reset_model to make it deterministic # overwrite reset_model to make it deterministic
@ -106,11 +125,13 @@ class ALRHopperJumpEnv(HopperEnv):
self.contact_dist = None self.contact_dist = None
return observation return observation
def _contact_checker(self, id_1, id_2): def _is_floor_foot_contact(self):
for coni in range(0, self.sim.data.ncon): floor_geom_id = self.model.geom_name2id('floor')
con = self.sim.data.contact[coni] foot_geom_id = self.model.geom_name2id('foot_geom')
collision = con.geom1 == id_1 and con.geom2 == id_2 for i in range(self.data.ncon):
collision_trans = con.geom1 == id_2 and con.geom2 == id_1 contact = self.data.contact[i]
collision = contact.geom1 == floor_geom_id and contact.geom2 == foot_geom_id
collision_trans = contact.geom1 == foot_geom_id and contact.geom2 == floor_geom_id
if collision or collision_trans: if collision or collision_trans:
return True return True
return False return False
@ -122,7 +143,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
self._floor_geom_id = self.model.geom_name2id('floor') self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom') self._foot_geom_id = self.model.geom_name2id('foot_geom')
self.current_step += 1 self._steps += 1
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2] height_after = self.get_body_com("torso")[2]
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
@ -133,7 +154,7 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
# self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor # self.has_left_floor = not floor_contact if self.init_floor_contact and not self.has_left_floor else self.has_left_floor
# self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor # self.contact_with_floor = floor_contact if not self.contact_with_floor and self.has_left_floor else self.contact_with_floor
floor_contact = self._contact_checker(self._floor_geom_id, floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
self._foot_geom_id) if not self.contact_with_floor else False self._foot_geom_id) if not self.contact_with_floor else False
if not self.init_floor_contact: if not self.init_floor_contact:
self.init_floor_contact = floor_contact self.init_floor_contact = floor_contact
@ -151,9 +172,9 @@ class ALRHopperXYJumpEnv(ALRHopperJumpEnv):
done = False done = False
goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0])) goal_dist = np.linalg.norm(site_pos_after - np.array([self.goal, 0, 0]))
rewards = 0 rewards = 0
if self.current_step >= self.max_episode_steps: if self._steps >= self.max_episode_steps:
# healthy_reward = 0 if self.context else self.healthy_reward * self.current_step # healthy_reward = 0 if self.context else self.healthy_reward * self._steps
healthy_reward = self.healthy_reward * 2 # * self.current_step healthy_reward = self.healthy_reward * 2 # * self._steps
contact_dist = self.contact_dist if self.contact_dist is not None else 5 contact_dist = self.contact_dist if self.contact_dist is not None else 5
dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist) dist_reward = self._forward_reward_weight * (-3 * goal_dist + 10 * self.max_height - 2 * contact_dist)
rewards = dist_reward + healthy_reward rewards = dist_reward + healthy_reward
@ -254,7 +275,7 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
self._floor_geom_id = self.model.geom_name2id('floor') self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom') self._foot_geom_id = self.model.geom_name2id('foot_geom')
self.current_step += 1 self._steps += 1
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
height_after = self.get_body_com("torso")[2] height_after = self.get_body_com("torso")[2]
site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy() site_pos_after = self.sim.data.site_xpos[self.model.site_name2id('foot_site')].copy()
@ -273,7 +294,7 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
########################################################### ###########################################################
# This is only for logging the distance to goal when first having the contact # This is only for logging the distance to goal when first having the contact
########################################################## ##########################################################
floor_contact = self._contact_checker(self._floor_geom_id, floor_contact = self._is_floor_foot_contact(self._floor_geom_id,
self._foot_geom_id) if not self.contact_with_floor else False self._foot_geom_id) if not self.contact_with_floor else False
if not self.init_floor_contact: if not self.init_floor_contact:
self.init_floor_contact = floor_contact self.init_floor_contact = floor_contact
@ -297,31 +318,3 @@ class ALRHopperXYJumpEnvStepBased(ALRHopperXYJumpEnv):
'contact_dist': self.contact_dist if self.contact_dist is not None else 0 'contact_dist': self.contact_dist if self.contact_dist is not None else 0
} }
return observation, reward, done, info return observation, reward, done, info
if __name__ == '__main__':
render_mode = "human" # "human" or "partial" or "final"
# env = ALRHopperJumpEnv()
# env = ALRHopperXYJumpEnv()
np.random.seed(0)
env = ALRHopperXYJumpEnvStepBased()
env.seed(0)
# env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
for k in range(1000):
obs = env.reset()
print('observation :', obs[:])
for i in range(200):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
# if i % 10 == 0:
# env.render(mode=render_mode)
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -1,57 +1,25 @@
from typing import Tuple, Union from typing import Union, Tuple
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):
@property
def context_mask(self) -> np.ndarray: # Random x goal + random init pos
def context_mask(self):
return np.hstack([ return np.hstack([
[False] * (5 + int(not self.exclude_current_positions_from_observation)), # position [False] * (2 + int(not self.exclude_current_positions_from_observation)), # position
[True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity [False] * 6, # velocity
[True] [True]
]) ])
@property @property
def current_pos(self) -> Union[float, int, np.ndarray]: def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy() return self.sim.data.qpos[3:6].copy()
@property @property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy() return self.sim.data.qvel[3:6].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt
class HighCtxtMPWrapper(MPWrapper):
@property
def active_obs(self):
return np.hstack([
[True] * (5 + int(not self.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[False]
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -1,45 +0,0 @@
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper
from typing import Union, Tuple
import numpy as np
class MPWrapper(BlackBoxWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
# # random goal
# def set_active_obs(self):
# return np.hstack([
# [False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
# [False] * 6, # velocity
# [True]
# ])
# Random x goal + random init pos
def get_context_mask(self):
return np.hstack([
[False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
[True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity
[True]
])
class NewHighCtxtMPWrapper(MPWrapper):
def get_context_mask(self):
return np.hstack([
[False] * (2 + int(not self.env.exclude_current_positions_from_observation)), # position
[True] * 3, # set to true if randomize initial pos
[False] * 6, # velocity
[True], # goal
[False] * 3 # goal diff
])
def set_context(self, context):
return self.get_observation_from_step(self.env.env.set_context(context))

View File

@ -67,7 +67,7 @@ class ALRHopperThrowEnv(HopperEnv):
info = { info = {
'ball_pos': ball_pos_after, 'ball_pos': ball_pos_after,
'ball_pos_y': ball_pos_after_y, 'ball_pos_y': ball_pos_after_y,
'current_step' : self.current_step, '_steps' : self.current_step,
'goal' : self.goal, 'goal' : self.goal,
} }

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -1,152 +0,0 @@
import os
import numpy as np
from gym import utils
from gym.envs.mujoco import MujocoEnv
import alr_envs.utils.utils as alr_utils
class ALRReacherEnv(MujocoEnv, utils.EzPickle):
def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1,
balance: bool = False):
utils.EzPickle.__init__(**locals())
self._steps = 0
self.steps_before_reward = steps_before_reward
self.n_links = n_links
self.balance = balance
self.balance_weight = 1.0
self.ctrl_cost_weight = ctrl_cost_weight
self.reward_weight = 1
if steps_before_reward == 200:
self.reward_weight = 200
elif steps_before_reward == 50:
self.reward_weight = 50
if n_links == 5:
file_name = 'reacher_5links.xml'
elif n_links == 7:
file_name = 'reacher_7links.xml'
else:
raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
def step(self, a):
self._steps += 1
reward_dist = 0.0
angular_vel = 0.0
reward_balance = 0.0
is_delayed = self.steps_before_reward > 0
reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight
if self._steps >= self.steps_before_reward:
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist -= self.reward_weight * np.linalg.norm(vec)
if is_delayed:
# avoid giving this penalty for normal step based case
# angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum()
# if is_delayed:
# # Higher control penalty for sparse reward per timestep
# reward_ctrl *= 10
if self.balance:
reward_balance -= self.balance_weight * np.abs(
alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
reward = reward_dist + reward_ctrl + angular_vel + reward_balance
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl,
velocity=angular_vel, reward_balance=reward_balance,
end_effector=self.get_body_com("fingertip").copy(),
goal=self.goal if hasattr(self, "goal") else None)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
# def reset_model(self):
# qpos = self.init_qpos
# if not hasattr(self, "goal"):
# self.goal = np.array([-0.25, 0.25])
# # self.goal = self.init_qpos.copy()[:2] + 0.05
# qpos[-2:] = self.goal
# qvel = self.init_qvel
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
def reset_model(self):
qpos = self.init_qpos.copy()
while True:
# full space
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# I Quadrant
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
# II Quadrant
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
# II + III Quadrant
# self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
# I + II Quadrant
self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos[-2:] = self.goal
qvel = self.init_qvel.copy()
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
# def reset_model(self):
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
# while True:
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# if np.linalg.norm(self.goal) < self.n_links / 10:
# break
# qpos[-2:] = self.goal
# qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
target = self.get_body_com("target")
return np.concatenate([
np.cos(theta),
np.sin(theta),
target[:2], # x-y of goal position
self.sim.data.qvel.flat[:self.n_links], # angular velocity
self.get_body_com("fingertip") - target, # goal distance
[self._steps],
])
if __name__ == '__main__':
nl = 5
render_mode = "human" # "human" or "partial" or "final"
env = ALRReacherEnv(n_links=nl)
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
if i % 10 == 0:
env.render(mode=render_mode)
if d:
env.reset()
env.close()

View File

@ -1,53 +0,0 @@
import os
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
import alr_envs.utils.utils as alr_utils
class BalancingEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self, n_links=5):
utils.EzPickle.__init__(**locals())
self.n_links = n_links
if n_links == 5:
file_name = 'reacher_5links.xml'
elif n_links == 7:
file_name = 'reacher_7links.xml'
else:
raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.")
mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
def step(self, a):
angle = alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")
reward = - np.abs(angle)
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(angle=angle, end_effector=self.get_body_com("fingertip").copy())
def viewer_setup(self):
self.viewer.cam.trackbodyid = 1
def reset_model(self):
# This also generates a goal, we however do not need/use it
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
qpos[-2:] = 0
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.sim.data.qvel.flat[:self.n_links], # this is angular velocity
])

View File

@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -0,0 +1,105 @@
import os
import numpy as np
from gym import utils
from gym.envs.mujoco import MujocoEnv
class ReacherEnv(MujocoEnv, utils.EzPickle):
"""
More general version of the gym mujoco Reacher environment
"""
def __init__(self, sparse: bool = False, n_links: int = 5, ctrl_cost_weight: int = 1):
utils.EzPickle.__init__(**locals())
self._steps = 0
self.n_links = n_links
self.ctrl_cost_weight = ctrl_cost_weight
self.sparse = sparse
self.reward_weight = 1 if not sparse else 200
file_name = f'reacher_{n_links}links.xml'
MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2)
def step(self, action):
self._steps += 1
is_reward = not self.sparse or (self.sparse and self._steps == 200)
reward_dist = 0.0
angular_vel = 0.0
if is_reward:
reward_dist = self.distance_reward()
angular_vel = self.velocity_reward()
reward_ctrl = -self.ctrl_cost_weight * np.square(action).sum()
reward = reward_dist + reward_ctrl + angular_vel
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
done = False
infos = dict(
reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
velocity=angular_vel,
end_effector=self.get_body_com("fingertip").copy(),
goal=self.goal if hasattr(self, "goal") else None
)
return ob, reward, done, infos
def distance_reward(self):
vec = self.get_body_com("fingertip") - self.get_body_com("target")
return -self.reward_weight * np.linalg.norm(vec)
def velocity_reward(self):
return -10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum() if self.sparse else 0.0
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = (
# self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) +
self.init_qpos.copy()
)
while True:
# full space
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# I Quadrant
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
# II Quadrant
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
# II + III Quadrant
# self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2)
# I + II Quadrant
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos[-2:] = self.goal
qvel = (
# self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv) +
self.init_qvel.copy()
)
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:self.n_links]
target = self.get_body_com("target")
return np.concatenate([
np.cos(theta),
np.sin(theta),
target[:2], # x-y of goal position
self.sim.data.qvel.flat[:self.n_links], # angular velocity
self.get_body_com("fingertip") - target, # goal distance
])

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -6,8 +6,8 @@ import numpy as np
from gym import spaces from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.mp.controllers.base_controller import BaseController from alr_envs.black_box.controller.base_controller import BaseController
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import get_numpy from alr_envs.utils.utils import get_numpy
@ -15,10 +15,14 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def __init__(self, def __init__(self,
env: RawInterfaceWrapper, env: RawInterfaceWrapper,
trajectory_generator: MPInterface, tracking_controller: BaseController, trajectory_generator: MPInterface,
duration: float, verbose: int = 1, learn_sub_trajectories: bool = False, tracking_controller: BaseController,
duration: float,
verbose: int = 1,
learn_sub_trajectories: bool = False,
replanning_schedule: Union[None, callable] = None, replanning_schedule: Union[None, callable] = None,
reward_aggregation: callable = np.sum): reward_aggregation: callable = np.sum
):
""" """
gym.Wrapper for leveraging a black box approach with a trajectory generator. gym.Wrapper for leveraging a black box approach with a trajectory generator.

View File

@ -1,7 +1,7 @@
from alr_envs.mp.controllers.meta_world_controller import MetaWorldController from alr_envs.black_box.controller.meta_world_controller import MetaWorldController
from alr_envs.mp.controllers.pd_controller import PDController from alr_envs.black_box.controller.pd_controller import PDController
from alr_envs.mp.controllers.vel_controller import VelController from alr_envs.black_box.controller.vel_controller import VelController
from alr_envs.mp.controllers.pos_controller import PosController from alr_envs.black_box.controller.pos_controller import PosController
ALL_TYPES = ["motor", "velocity", "position", "metaworld"] ALL_TYPES = ["motor", "velocity", "position", "metaworld"]

View File

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from alr_envs.mp.controllers.base_controller import BaseController from alr_envs.black_box.controller.base_controller import BaseController
class MetaWorldController(BaseController): class MetaWorldController(BaseController):

View File

@ -1,6 +1,6 @@
from typing import Union, Tuple from typing import Union, Tuple
from alr_envs.mp.controllers.base_controller import BaseController from alr_envs.black_box.controller.base_controller import BaseController
class PDController(BaseController): class PDController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.mp.controllers.base_controller import BaseController from alr_envs.black_box.controller.base_controller import BaseController
class PosController(BaseController): class PosController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.mp.controllers.base_controller import BaseController from alr_envs.black_box.controller.base_controller import BaseController
class VelController(BaseController): class VelController(BaseController):

View File

View File

@ -9,7 +9,7 @@ ALL_TYPES = ["promp", "dmp", "idmp"]
def get_trajectory_generator( def get_trajectory_generator(
trajectory_generator_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs trajectory_generator_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
): ):
trajectory_generator_type = trajectory_generator_type.lower() trajectory_generator_type = trajectory_generator_type.lower()
if trajectory_generator_type == "promp": if trajectory_generator_type == "promp":
return ProMP(basis_generator, action_dim, **kwargs) return ProMP(basis_generator, action_dim, **kwargs)

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -7,12 +7,12 @@ import numpy as np
from gym.envs.registration import EnvSpec, registry from gym.envs.registration import EnvSpec, registry
from gym.wrappers import TimeAwareObservation from gym.wrappers import TimeAwareObservation
from alr_envs.mp.basis_generator_factory import get_basis_generator from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
from alr_envs.mp.black_box_wrapper import BlackBoxWrapper from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
from alr_envs.mp.controllers.controller_factory import get_controller from alr_envs.black_box.controller.controller_factory import get_controller
from alr_envs.mp.mp_factory import get_trajectory_generator from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
from alr_envs.mp.phase_generator_factory import get_phase_generator from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
from alr_envs.mp.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update from alr_envs.utils.utils import nested_update
@ -46,6 +46,7 @@ def make(env_id, seed, **kwargs):
spec = registry.get(env_id) spec = registry.get(env_id)
# This access is required to allow for nested dict updates # This access is required to allow for nested dict updates
all_kwargs = deepcopy(spec._kwargs) all_kwargs = deepcopy(spec._kwargs)
# TODO append wrapper here
nested_update(all_kwargs, **kwargs) nested_update(all_kwargs, **kwargs)
return _make(env_id, seed, **all_kwargs) return _make(env_id, seed, **all_kwargs)
@ -224,8 +225,8 @@ def make_bb_env_helper(**kwargs):
seed = kwargs.pop("seed", None) seed = kwargs.pop("seed", None)
wrappers = kwargs.pop("wrappers") wrappers = kwargs.pop("wrappers")
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
black_box_kwargs = kwargs.pop('black_box_kwargs', {}) black_box_kwargs = kwargs.pop('black_box_kwargs', {})
traj_gen_kwargs = kwargs.pop("traj_gen_kwargs", {})
contr_kwargs = kwargs.pop("controller_kwargs", {}) contr_kwargs = kwargs.pop("controller_kwargs", {})
phase_kwargs = kwargs.pop("phase_generator_kwargs", {}) phase_kwargs = kwargs.pop("phase_generator_kwargs", {})
basis_kwargs = kwargs.pop("basis_generator_kwargs", {}) basis_kwargs = kwargs.pop("basis_generator_kwargs", {})