From 8903c2dc9f02c0ba0229798c0f28222751f7bd94 Mon Sep 17 00:00:00 2001 From: Fabian Date: Wed, 25 Jan 2023 09:20:36 +0100 Subject: [PATCH] updated for new mp_pytorch --- alr_envs/alr/__init__.py | 1290 ----------------- alr_envs/alr/mujoco/reacher/alr_reacher.py | 152 -- alr_envs/alr/mujoco/reacher/new_mp_wrapper.py | 24 - fancy_gym/black_box/black_box_wrapper.py | 2 +- .../simple_reacher/__init__.py | 3 +- fancy_gym/meta/__init__.py | 3 +- 6 files changed, 4 insertions(+), 1470 deletions(-) delete mode 100644 alr_envs/alr/__init__.py delete mode 100644 alr_envs/alr/mujoco/reacher/alr_reacher.py delete mode 100644 alr_envs/alr/mujoco/reacher/new_mp_wrapper.py diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py deleted file mode 100644 index b49776a..0000000 --- a/alr_envs/alr/__init__.py +++ /dev/null @@ -1,1290 +0,0 @@ -import numpy as np -from gym import register - -from . import classic_control, mujoco -from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv -from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv -from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv -from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv -from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv -from .mujoco.reacher.alr_reacher import ALRReacherEnv -from .mujoco.reacher.balancing import BalancingEnv - -from alr_envs.alr.mujoco.table_tennis.tt_gym import MAX_EPISODE_STEPS -from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP -from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP -from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP -from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX -from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW -from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET -from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP - -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} - -# Classic Control -## Simple Reacher -register( - id='SimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 2, - } -) - -register( - id='SimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 2, - "random_start": False - } -) - -register( - id='LongSimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - } -) - -register( - id='LongSimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False - } -) - -## Viapoint Reacher - -register( - id='ViaPointReacher-v0', - entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "allow_self_collision": False, - "collision_penalty": 1000 - } -) - -## Hole Reacher -register( - id='HoleReacher-v0', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": True, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": None, - "hole_depth": 1, - "hole_x": None, - "collision_penalty": 100, - } -) - -register( - id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": None, - "collision_penalty": 100, - } -) - -register( - id='HoleReacher-v2', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - "random_start": False, - "allow_self_collision": False, - "allow_wall_collision": False, - "hole_width": 0.25, - "hole_depth": 1, - "hole_x": 2, - "collision_penalty": 1, - } -) - -# Mujoco - -## Reacher -register( - id='ALRReacher-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 0, - "n_links": 5, - "balance": False, - } -) - -register( - id='ALRReacherSparse-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": False, - } -) - -register( - id='ALRReacherSparseOptCtrl-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": False, - } -) - -register( - id='ALRReacherSparseBalanced-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": True, - } -) - -register( - id='ALRLongReacher-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 0, - "n_links": 7, - "balance": False, - } -) - -register( - id='ALRLongReacherSparse-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 7, - "balance": False, - } -) - -register( - id='ALRLongReacherSparseBalanced-v0', - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 7, - "balance": True, - } -) - -_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] -for i in _vs: - _env_id = f'ALRReacher{i}-v0' - register( - id=_env_id, - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 0, - "n_links": 5, - "balance": False, - 'ctrl_cost_weight': i - } - ) - - _env_id = f'ALRReacherSparse{i}-v0' - register( - id=_env_id, - entry_point='alr_envs.alr.mujoco:ALRReacherEnv', - max_episode_steps=200, - kwargs={ - "steps_before_reward": 200, - "n_links": 5, - "balance": False, - 'ctrl_cost_weight': i - } - ) - -# CtxtFree are v0, Contextual are v1 -register( - id='ALRAntJump-v0', - entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, - "context": False - } -) - -# CtxtFree are v0, Contextual are v1 -register( - id='ALRAntJump-v1', - entry_point='alr_envs.alr.mujoco:ALRAntJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, - "context": True - } -) - -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHalfCheetahJump-v1', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, - "context": True - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": False, - "healthy_reward": 1.0 - } -) -register( - id='ALRHopperJump-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": True - } -) - -register( - id='ALRHopperJump-v2', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpRndmPosEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP - } -) - -register( - id='ALRHopperJump-v3', - entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": True, - "healthy_reward": 1.0 - } -) - -##### Hopper Jump step based reward -register( - id='ALRHopperJump-v4', - entry_point='alr_envs.alr.mujoco:ALRHopperXYJumpEnvStepBased', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, - "context": True, - "healthy_reward": 1.0 - } -) - - -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperJumpOnBox-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperJumpOnBox-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperJumpOnBoxEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, - "context": True - } -) -# CtxtFree are v0, Contextual are v1 - -register( - id='ALRHopperThrow-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperThrow-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, - "context": True - } -) -# CtxtFree are v0, Contextual are v1 - -register( - id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRHopperThrowInBasket-v1', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', - max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, - "context": True - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRWalker2DJump-v0', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, - "context": False - } -) -# CtxtFree are v0, Contextual are v1 -register( - id='ALRWalker2DJump-v1', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', - max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, - kwargs={ - "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, - "context": True - } -) - -## Balancing Reacher - -register( - id='Balancing-v0', - entry_point='alr_envs.alr.mujoco:BalancingEnv', - max_episode_steps=200, - kwargs={ - "n_links": 5, - } -) - -## Table Tennis -register(id='TableTennis2DCtxt-v0', - entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 2}) - -register(id='TableTennis2DCtxt-v1', - entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 2, 'fixed_goal': True}) - -register(id='TableTennis4DCtxt-v0', - entry_point='alr_envs.alr.mujocco:TTEnvGym', - max_episode_steps=MAX_EPISODE_STEPS, - kwargs={'ctxt_dim': 4}) - -## BeerPong -# fixed goal cup position -register( - id='ALRBeerPong-v0', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', - max_episode_steps=300, - kwargs={ - "rndm_goal": False, - "cup_goal_pos": [0.1, -2.0], - "frame_skip": 2 - } - ) - - -# random goal cup position -register( - id='ALRBeerPong-v1', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) - -# random goal cup position -register( - id='ALRBeerPong-v2', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) -# Beerpong with episodic reward, but fixed release time step -register( - id='ALRBeerPong-v3', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) - -# Beerpong with episodic reward, but fixed release time step -register( - id='ALRBeerPong-v4', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvFixedReleaseStep', - max_episode_steps=300, - kwargs={ - "rndm_goal": True, - "cup_goal_pos": [-0.3, -1.2], - "frame_skip": 2 - } - ) - -# Motion Primitive Environments - -## Simple Reacher -_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}DMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "alpha_phase": 2, - "learn_goal": True, - "policy_type": "motor", - "weights_scale": 50, - "policy_kwargs": { - "p_gains": .6, - "d_gains": .075 - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - - _env_id = f'{_name[0]}ProMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [classic_control.simple_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 2 if "long" not in _v.lower() else 5, - "num_basis": 5, - "duration": 2, - "policy_type": "motor", - "weights_scale": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": .6, - "d_gains": .075 - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# Viapoint reacher -register( - id='ViaPointReacherDMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": "alr_envs:ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "learn_goal": True, - "alpha_phase": 2, - "policy_type": "velocity", - "weights_scale": 50, - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") - -register( - id="ViaPointReacherProMP-v0", - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ViaPointReacher-v0", - "wrappers": [classic_control.viapoint_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "policy_type": "velocity", - "weights_scale": 1, - "zero_start": True - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") - -## Hole Reacher -_versions = ["v0", "v1", "v2"] -for _v in _versions: - _env_id = f'HoleReacherDMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"alr_envs:HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 2, - "learn_goal": True, - "alpha_phase": 2.5, - "bandwidth_factor": 2, - "policy_type": "velocity", - "weights_scale": 50, - "goal_scale": 0.1 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - - _env_id = f'HoleReacherProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:HoleReacher-{_v}", - "wrappers": [classic_control.hole_reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 3, - "duration": 2, - "policy_type": "velocity", - "weights_scale": 5, - "zero_start": True - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## ALRReacher -_versions = ["ALRReacher-v0", "ALRLongReacher-v0", "ALRReacherSparse-v0", "ALRLongReacherSparse-v0"] -for _v in _versions: - _name = _v.split("-") - _env_id = f'{_name[0]}DMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', - # max_episode_steps=1, - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5 if "long" not in _v.lower() else 7, - "num_basis": 2, - "duration": 4, - "alpha_phase": 2, - "learn_goal": True, - "policy_type": "motor", - "weights_scale": 5, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) - - _env_id = f'{_name[0]}ProMP-{_name[1]}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:{_v}", - "wrappers": [mujoco.reacher.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 5 if "long" not in _v.lower() else 7 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 4, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": 1, - "d_gains": 0.1 - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 2, - 'num_basis_zero_start': 1 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - -_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] -for i in _vs: - _env_id = f'ALRReacher{i}ProMP-v0' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:{_env_id.replace('ProMP', '')}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 4, - "policy_type": "motor", - # "weights_scale": 5, - "n_zero_basis": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } - ) - - _env_id = f'ALRReacherSparse{i}ProMP-v0' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:{_env_id.replace('ProMP', '')}", - "wrappers": [mujoco.reacher.MPWrapper], - "mp_kwargs": { - "num_dof": 5, - "num_basis": 5, - "duration": 4, - "policy_type": "motor", - # "weights_scale": 5, - "n_zero_basis": 1, - "zero_start": True, - "policy_kwargs": { - "p_gains": 1, - "d_gains": 0.1 - } - } - } - ) - - -# ## Beerpong -# _versions = ["v0", "v1"] -# for _v in _versions: -# _env_id = f'BeerpongProMP-{_v}' -# register( -# id=_env_id, -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRBeerPong-{_v}", -# "wrappers": [mujoco.beerpong.MPWrapper], -# "mp_kwargs": { -# "num_dof": 7, -# "num_basis": 2, -# # "duration": 1, -# "duration": 0.5, -# # "post_traj_time": 2, -# "post_traj_time": 2.5, -# "policy_type": "motor", -# "weights_scale": 0.14, -# # "weights_scale": 1, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), -# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Beerpong -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'BeerpongProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRBeerPong-{_v}", - "wrappers": [mujoco.beerpong.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 7 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 0.8, # initial value - 'learn_tau': True, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 2, - 'num_basis_zero_start': 2 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Beerpong ProMP fixed release -_env_id = 'BeerpongProMP-v2' -register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": "alr_envs:ALRBeerPong-v4", - "wrappers": [mujoco.beerpong.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 7 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 0.62, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 2, - 'num_basis_zero_start': 2 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Table Tennis -ctxt_dim = [2, 4] -for _v, cd in enumerate(ctxt_dim): - _env_id = f'TableTennisProMP-v{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": "alr_envs:TableTennis{}DCtxt-v0".format(cd), - "wrappers": [mujoco.table_tennis.MPWrapper], - "mp_kwargs": { - "num_dof": 7, - "num_basis": 2, - "duration": 1.25, - "post_traj_time": 1.5, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": 0.5*np.array([1.0, 4.0, 2.0, 4.0, 1.0, 4.0, 1.0]), - "d_gains": 0.5*np.array([0.1, 0.4, 0.2, 0.4, 0.1, 0.4, 0.1]) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## AntJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRAntJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRAntJump-{_v}", - "wrappers": [mujoco.ant_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 8, - "num_basis": 5, - "duration": 10, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(8), - "d_gains": 0.1*np.ones(8) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## AntJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRAntJumpProMP-{_v}' - register( - id= _env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRAntJump-{_v}", - "wrappers": [mujoco.ant_jump.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 8 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 10, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(8), - "d_gains": 0.1*np.ones(8), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 2 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - - - -## HalfCheetahJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHalfCheetahJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHalfCheetahJump-{_v}", - "wrappers": [mujoco.half_cheetah_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 6, - "num_basis": 5, - "duration": 5, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(6), - "d_gains": 0.1*np.ones(6) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# ## HopperJump -# _versions = ["v0", "v1"] -# for _v in _versions: -# _env_id = f'ALRHopperJumpProMP-{_v}' -# register( -# id= _env_id, -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRHopperJump-{_v}", -# "wrappers": [mujoco.hopper_jump.MPWrapper], -# "mp_kwargs": { -# "num_dof": 3, -# "num_basis": 5, -# "duration": 2, -# "post_traj_time": 0, -# "policy_type": "motor", -# "weights_scale": 1.0, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.ones(3), -# "d_gains": 0.1*np.ones(3) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -# ## HopperJump -# register( -# id= "ALRHopperJumpProMP-v2", -# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', -# kwargs={ -# "name": f"alr_envs:ALRHopperJump-v2", -# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper], -# "mp_kwargs": { -# "num_dof": 3, -# "num_basis": 5, -# "duration": 2, -# "post_traj_time": 0, -# "policy_type": "motor", -# "weights_scale": 1.0, -# "zero_start": True, -# "zero_goal": False, -# "policy_kwargs": { -# "p_gains": np.ones(3), -# "d_gains": 0.1*np.ones(3) -# } -# } -# } -# ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") - -## HopperJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperJumpProMP-{_v}' - register( - id= _env_id, - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-{_v}", - "wrappers": [mujoco.hopper_jump.NewMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## HopperJump -register( - id= "ALRHopperJumpProMP-v2", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-v2", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2") - - -## HopperJump -register( - id= "ALRHopperJumpProMP-v3", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-v3", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v3") - - -## HopperJump -register( - id= "ALRHopperJumpProMP-v4", - entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJump-v4", - "wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper], - "ep_wrapper_kwargs": { - "weight_scale": 1 - }, - "movement_primitives_kwargs": { - 'movement_primitives_type': 'promp', - 'action_dim': 3 - }, - "phase_generator_kwargs": { - 'phase_generator_type': 'linear', - 'delay': 0, - 'tau': 2, # initial value - 'learn_tau': False, - 'learn_delay': False - }, - "controller_kwargs": { - 'controller_type': 'motor', - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3), - }, - "basis_generator_kwargs": { - 'basis_generator_type': 'zero_rbf', - 'num_basis': 5, - 'num_basis_zero_start': 1 - } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v4") - -## HopperJumpOnBox -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperJumpOnBoxProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperJumpOnBox-{_v}", - "wrappers": [mujoco.hopper_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -#HopperThrow -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperThrowProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperThrow-{_v}", - "wrappers": [mujoco.hopper_throw.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## HopperThrowInBasket -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRHopperThrowInBasketProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRHopperThrowInBasket-{_v}", - "wrappers": [mujoco.hopper_throw.MPWrapper], - "mp_kwargs": { - "num_dof": 3, - "num_basis": 5, - "duration": 2, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(3), - "d_gains": 0.1*np.ones(3) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) - -## Walker2DJump -_versions = ["v0", "v1"] -for _v in _versions: - _env_id = f'ALRWalker2DJumpProMP-{_v}' - register( - id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"alr_envs:ALRWalker2DJump-{_v}", - "wrappers": [mujoco.walker_2d_jump.MPWrapper], - "mp_kwargs": { - "num_dof": 6, - "num_basis": 5, - "duration": 2.4, - "post_traj_time": 0, - "policy_type": "motor", - "weights_scale": 1.0, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.ones(6), - "d_gains": 0.1*np.ones(6) - } - } - } - ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/alr_envs/alr/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py deleted file mode 100644 index 0699c44..0000000 --- a/alr_envs/alr/mujoco/reacher/alr_reacher.py +++ /dev/null @@ -1,152 +0,0 @@ -import os - -import numpy as np -from gym import utils -from gym.envs.mujoco import MujocoEnv - -import alr_envs.utils.utils as alr_utils - - -class ALRReacherEnv(MujocoEnv, utils.EzPickle): - def __init__(self, steps_before_reward: int = 200, n_links: int = 5, ctrl_cost_weight: int = 1, - balance: bool = False): - utils.EzPickle.__init__(**locals()) - - self._steps = 0 - self.steps_before_reward = steps_before_reward - self.n_links = n_links - - self.balance = balance - self.balance_weight = 1.0 - self.ctrl_cost_weight = ctrl_cost_weight - - self.reward_weight = 1 - if steps_before_reward == 200: - self.reward_weight = 200 - elif steps_before_reward == 50: - self.reward_weight = 50 - - if n_links == 5: - file_name = 'reacher_5links.xml' - elif n_links == 7: - file_name = 'reacher_7links.xml' - else: - raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.") - - MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2) - - def step(self, a): - self._steps += 1 - - reward_dist = 0.0 - angular_vel = 0.0 - reward_balance = 0.0 - is_delayed = self.steps_before_reward > 0 - reward_ctrl = - np.square(a).sum() * self.ctrl_cost_weight - if self._steps >= self.steps_before_reward: - vec = self.get_body_com("fingertip") - self.get_body_com("target") - reward_dist -= self.reward_weight * np.linalg.norm(vec) - if is_delayed: - # avoid giving this penalty for normal step based case - # angular_vel -= 10 * np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) - angular_vel -= 10 * np.square(self.sim.data.qvel.flat[:self.n_links]).sum() - # if is_delayed: - # # Higher control penalty for sparse reward per timestep - # reward_ctrl *= 10 - - if self.balance: - reward_balance -= self.balance_weight * np.abs( - alr_utils.angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) - - reward = reward_dist + reward_ctrl + angular_vel + reward_balance - self.do_simulation(a, self.frame_skip) - ob = self._get_obs() - done = False - return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, - velocity=angular_vel, reward_balance=reward_balance, - end_effector=self.get_body_com("fingertip").copy(), - goal=self.goal if hasattr(self, "goal") else None) - - def viewer_setup(self): - self.viewer.cam.trackbodyid = 0 - - # def reset_model(self): - # qpos = self.init_qpos - # if not hasattr(self, "goal"): - # self.goal = np.array([-0.25, 0.25]) - # # self.goal = self.init_qpos.copy()[:2] + 0.05 - # qpos[-2:] = self.goal - # qvel = self.init_qvel - # qvel[-2:] = 0 - # self.set_state(qpos, qvel) - # self._steps = 0 - # - # return self._get_obs() - - def reset_model(self): - qpos = self.init_qpos.copy() - while True: - # full space - # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) - # I Quadrant - # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2) - # II Quadrant - # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2) - # II + III Quadrant - # self.goal = np.random.uniform(low=-self.n_links / 10, high=[0, self.n_links / 10], size=2) - # I + II Quadrant - self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=self.n_links, size=2) - if np.linalg.norm(self.goal) < self.n_links / 10: - break - qpos[-2:] = self.goal - qvel = self.init_qvel.copy() - qvel[-2:] = 0 - self.set_state(qpos, qvel) - self._steps = 0 - - return self._get_obs() - - # def reset_model(self): - # qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos - # while True: - # self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) - # if np.linalg.norm(self.goal) < self.n_links / 10: - # break - # qpos[-2:] = self.goal - # qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) - # qvel[-2:] = 0 - # self.set_state(qpos, qvel) - # self._steps = 0 - # - # return self._get_obs() - - def _get_obs(self): - theta = self.sim.data.qpos.flat[:self.n_links] - target = self.get_body_com("target") - return np.concatenate([ - np.cos(theta), - np.sin(theta), - target[:2], # x-y of goal position - self.sim.data.qvel.flat[:self.n_links], # angular velocity - self.get_body_com("fingertip") - target, # goal distance - [self._steps], - ]) - - -if __name__ == '__main__': - nl = 5 - render_mode = "human" # "human" or "partial" or "final" - env = ALRReacherEnv(n_links=nl) - obs = env.reset() - - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - env.reset() - - env.close() diff --git a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py b/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py deleted file mode 100644 index bf59380..0000000 --- a/alr_envs/alr/mujoco/reacher/new_mp_wrapper.py +++ /dev/null @@ -1,24 +0,0 @@ -from alr_envs.mp.episodic_wrapper import EpisodicWrapper -from typing import Union, Tuple -import numpy as np - - -class NewMPWrapper(EpisodicWrapper): - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qpos.flat[:self.env.n_links] - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qvel.flat[:self.env.n_links] - - def set_active_obs(self): - return np.concatenate([ - [False] * self.env.n_links, # cos - [False] * self.env.n_links, # sin - [True] * 2, # goal position - [False] * self.env.n_links, # angular velocity - [False] * 3, # goal distance - # self.get_body_com("target"), # only return target to make problem harder - [False], # step - ]) diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index 5650c71..a567d16 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -86,7 +86,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) # TODO we could think about initializing with the previous desired value in order to have a smooth transition # at least from the planning point of view. - self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) + self.traj_gen.set_initial_conditions(bc_time, self.current_pos, self.current_vel) self.traj_gen.set_duration(duration, self.dt) # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) position = get_numpy(self.traj_gen.get_traj_pos()) diff --git a/fancy_gym/envs/classic_control/simple_reacher/__init__.py b/fancy_gym/envs/classic_control/simple_reacher/__init__.py index 5d15867..989b5a9 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/__init__.py +++ b/fancy_gym/envs/classic_control/simple_reacher/__init__.py @@ -1,2 +1 @@ -from .mp_wrapper import MPWrapper -from .new_mp_wrapper import NewMPWrapper +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/fancy_gym/meta/__init__.py b/fancy_gym/meta/__init__.py index 98935dc..9304c72 100644 --- a/fancy_gym/meta/__init__.py +++ b/fancy_gym/meta/__init__.py @@ -36,7 +36,8 @@ DEFAULT_BB_DICT_ProDMP = { 'trajectory_generator_type': 'prodmp', 'auto_scale_basis': True, 'weights_scale': 10, - 'goal_scale': 0. + # 'goal_scale': 0., + 'disable_goal': True, }, "phase_generator_kwargs": { 'phase_generator_type': 'exp',