From 123915e4fa3c4fcad9807fe473c06cbb6d321836 Mon Sep 17 00:00:00 2001 From: Fabian Date: Mon, 11 Jul 2022 16:42:56 +0200 Subject: [PATCH] naming convention and running tests --- alr_envs/black_box/black_box_wrapper.py | 4 +- alr_envs/dmc/__init__.py | 306 +----------------------- 2 files changed, 14 insertions(+), 296 deletions(-) diff --git a/alr_envs/black_box/black_box_wrapper.py b/alr_envs/black_box/black_box_wrapper.py index 3686ad9..8635ca7 100644 --- a/alr_envs/black_box/black_box_wrapper.py +++ b/alr_envs/black_box/black_box_wrapper.py @@ -71,8 +71,8 @@ class BlackBoxWrapper(gym.ObservationWrapper): def observation(self, observation): # return context space if we are mask = self.env.context_mask - if self.is_time_aware: - mask = np.append(mask, False) + # if self.is_time_aware: + # mask = np.append(mask, False) obs = observation[mask] if self.return_context_observation else observation # cast dtype because metaworld returns incorrect that throws gym error return obs.astype(self.observation_space.dtype) diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py index 0993661..c1e24eb 100644 --- a/alr_envs/dmc/__init__.py +++ b/alr_envs/dmc/__init__.py @@ -17,8 +17,8 @@ DEFAULT_BB_DICT_ProMP = { }, "controller_kwargs": { 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, + "p_gains": 50., + "d_gains": 1., }, "basis_generator_kwargs": { 'basis_generator_type': 'zero_rbf', @@ -38,8 +38,8 @@ DEFAULT_BB_DICT_DMP = { }, "controller_kwargs": { 'controller_type': 'motor', - "p_gains": 1.0, - "d_gains": 0.1, + "p_gains": 50., + "d_gains": 1., }, "basis_generator_kwargs": { 'basis_generator_type': 'rbf', @@ -51,200 +51,73 @@ DEFAULT_BB_DICT_DMP = { kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch" kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper) +# bandwidth_factor=2 kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1 -kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_ball_in_cup-catch_dmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_bic_dmp - # { - # "name": f"ball_in_cup-catch", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.ball_in_cup.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch" kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) -kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_ball_in_cup-catch_promp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bic_promp - # { - # "name": f"ball_in_cup-catch", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.ball_in_cup.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "policy_type": "motor", - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy" kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper) +# bandwidth_factor=2 kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_reacher-easy_dmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_bic_dmp - # { - # "name": f"reacher-easy", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.reacher.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "weights_scale": 50, - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy" kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper) -kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( id=f'dmc_reacher-easy_promp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_reacher_easy_promp - # { - # "name": f"reacher-easy", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.reacher.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "policy_type": "motor", - # "weights_scale": 0.2, - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard" kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper) +# bandwidth_factor = 2 kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 -kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_reacher-hard_dmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_reacher_hard_dmp - # { - # "name": f"reacher-hard", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.reacher.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "weights_scale": 50, - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard" kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper) -kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50 -kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( id=f'dmc_reacher-hard_promp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_reacher_hard_promp - # { - # "name": f"reacher-hard", - # "time_limit": 20, - # "episode_length": 1000, - # "wrappers": [suite.reacher.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 2, - # "num_basis": 5, - # "duration": 20, - # "policy_type": "motor", - # "weights_scale": 0.2, - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 50, - # "d_gains": 1 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") @@ -255,38 +128,16 @@ for _task in _dmc_cartpole_tasks: kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}" kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper) + # bandwidth_factor = 2 kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2 - kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][ - 'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 + # TODO: weight scale 50, but goal scale 0.1 + kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10 register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_cartpole_dmp - # { - # "name": f"cartpole-{_task}", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "weights_scale": 50, - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) @@ -301,31 +152,13 @@ for _task in _dmc_cartpole_tasks: id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_cartpole_promp - # { - # "name": f"cartpole-{_task}", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "policy_type": "motor", - # "weights_scale": 0.2, - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles" kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) +# bandwidth_factor = 2 kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 @@ -335,30 +168,7 @@ _env_id = f'dmc_cartpole-two_poles_dmp-v0' register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_cartpole2poles_dmp - # { - # "name": f"cartpole-two_poles", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.TwoPolesMPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "weights_scale": 50, - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) @@ -373,31 +183,13 @@ register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_cartpole2poles_promp - # { - # "name": f"cartpole-two_poles", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.TwoPolesMPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "policy_type": "motor", - # "weights_scale": 0.2, - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles" kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) +# bandwidth_factor = 2 kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 @@ -407,30 +199,7 @@ _env_id = f'dmc_cartpole-three_poles_dmp-v0' register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_cartpole3poles_dmp - # { - # "name": f"cartpole-three_poles", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.ThreePolesMPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "motor", - # "weights_scale": 50, - # "goal_scale": 0.1, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) @@ -445,25 +214,6 @@ register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_cartpole3poles_promp - # { - # "name": f"cartpole-three_poles", - # # "time_limit": 1, - # "camera_id": 0, - # "episode_length": 1000, - # "wrappers": [suite.cartpole.ThreePolesMPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 1, - # "num_basis": 5, - # "duration": 10, - # "policy_type": "motor", - # "weights_scale": 0.2, - # "zero_start": True, - # "policy_kwargs": { - # "p_gains": 10, - # "d_gains": 10 - # } - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) @@ -478,25 +228,7 @@ kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] register( id=f'dmc_manipulation-reach_site_dmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', - # max_episode_steps=1, kwargs=kwargs_dict_mani_reach_site_features_dmp - # { - # "name": f"manipulation-reach_site_features", - # # "time_limit": 1, - # "episode_length": 250, - # "wrappers": [manipulation.reach_site.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 9, - # "num_basis": 5, - # "duration": 10, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "velocity", - # "weights_scale": 50, - # "goal_scale": 0.1, - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") @@ -509,19 +241,5 @@ register( id=f'dmc_manipulation-reach_site_promp-v0', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_mani_reach_site_features_promp - # { - # "name": f"manipulation-reach_site_features", - # # "time_limit": 1, - # "episode_length": 250, - # "wrappers": [manipulation.reach_site.MPWrapper], - # "traj_gen_kwargs": { - # "num_dof": 9, - # "num_basis": 5, - # "duration": 10, - # "policy_type": "velocity", - # "weights_scale": 0.2, - # "zero_start": True, - # } - # } ) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")