diff --git a/README.md b/README.md index 607af63..ac012c4 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ print("OpenAI Gym MP tasks:") print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) print("Deepmind Control MP tasks:") -print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS) +print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("MetaWorld MP tasks:") print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS) diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index cf910b9..e4a405d 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,15 +1,14 @@ from alr_envs import dmc, meta, open_ai -from alr_envs.utils import make_dmc from alr_envs.utils.make_env_helpers import make, make_bb, make_rank # Convenience function for all MP environments from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS -from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS +from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS -ALL_MOTION_PRIMITIVE_ENVIRONMENTS = { - key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] + +ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { + key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key] for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()} diff --git a/alr_envs/black_box/black_box_wrapper.py b/alr_envs/black_box/black_box_wrapper.py index a3b504c..3686ad9 100644 --- a/alr_envs/black_box/black_box_wrapper.py +++ b/alr_envs/black_box/black_box_wrapper.py @@ -1,4 +1,7 @@ -from typing import Tuple, Union, Optional +import os +os.environ["MUJOCO_GL"] = "egl" + +from typing import Tuple, Optional import gym import numpy as np @@ -67,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper): def observation(self, observation): # return context space if we are - obs = observation[self.env.context_mask] if self.return_context_observation else observation + mask = self.env.context_mask + if self.is_time_aware: + mask = np.append(mask, False) + obs = observation[mask] if self.return_context_observation else observation # cast dtype because metaworld returns incorrect that throws gym error return obs.astype(self.observation_space.dtype) diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py index ca27f79..0993661 100644 --- a/alr_envs/dmc/__init__.py +++ b/alr_envs/dmc/__init__.py @@ -2,7 +2,7 @@ from copy import deepcopy from . import manipulation, suite -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} from gym.envs.registration import register @@ -47,10 +47,9 @@ DEFAULT_BB_DICT_DMP = { } } - # DeepMind Control Suite (DMC) kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_dmp['name'] = f"ball_in_cup-catch" +kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch" kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper) kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1 @@ -58,304 +57,313 @@ kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50 kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_ball_in_cup-catch_dmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"ball_in_cup-catch", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.ball_in_cup.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + kwargs=kwargs_dict_bic_dmp + # { + # "name": f"ball_in_cup-catch", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.ball_in_cup.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_bic_promp['name'] = f"ball_in_cup-catch" +kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch" kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_ball_in_cup-catch_promp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"ball_in_cup-catch", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.ball_in_cup.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "policy_type": "motor", - "zero_start": True, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_bic_promp + # { + # "name": f"ball_in_cup-catch", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.ball_in_cup.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "policy_type": "motor", + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_dmp['name'] = f"reacher-easy" +kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy" kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 +# TODO: weight scale 50, but goal scale 0.1 +kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_reacher-easy_dmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"reacher-easy", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "weights_scale": 50, - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + kwargs=kwargs_dict_bic_dmp + # { + # "name": f"reacher-easy", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.reacher.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "weights_scale": 50, + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_easy_promp['name'] = f"reacher-easy" +kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy" kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( id=f'dmc_reacher-easy_promp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"reacher-easy", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_reacher_easy_promp + # { + # "name": f"reacher-easy", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.reacher.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "policy_type": "motor", + # "weights_scale": 0.2, + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_dmp['name'] = f"reacher-hard" +kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard" kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 +# TODO: weight scale 50, but goal scale 0.1 +kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1 register( id=f'dmc_reacher-hard_dmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"reacher-hard", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "weights_scale": 50, - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + kwargs=kwargs_dict_reacher_hard_dmp + # { + # "name": f"reacher-hard", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.reacher.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "weights_scale": 50, + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_reacher_hard_promp['name'] = f"reacher-hard" +kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard" kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( id=f'dmc_reacher-hard_promp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"reacher-hard", - "time_limit": 20, - "episode_length": 1000, - "wrappers": [suite.reacher.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 2, - "num_basis": 5, - "duration": 20, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "policy_kwargs": { - "p_gains": 50, - "d_gains": 1 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_reacher_hard_promp + # { + # "name": f"reacher-hard", + # "time_limit": 20, + # "episode_length": 1000, + # "wrappers": [suite.reacher.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 2, + # "num_basis": 5, + # "duration": 20, + # "policy_type": "motor", + # "weights_scale": 0.2, + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 50, + # "d_gains": 1 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] for _task in _dmc_cartpole_tasks: _env_id = f'dmc_cartpole-{_task}_dmp-v0' kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_dmp['name'] = f"cartpole-{_task}" - kwargs_dict_cartpole_dmp['camera_id'] = 0 + kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}" kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper) kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2 - kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 + kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][ + 'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10 register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"cartpole-{_task}", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "weights_scale": 50, - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + kwargs=kwargs_dict_cartpole_dmp + # { + # "name": f"cartpole-{_task}", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "weights_scale": 50, + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) - ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'dmc_cartpole-{_task}_promp-v0' kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP) - kwargs_dict_cartpole_promp['name'] = f"cartpole-{_task}" - kwargs_dict_cartpole_promp['camera_id'] = 0 + kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}" kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper) kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"cartpole-{_task}", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_cartpole_promp + # { + # "name": f"cartpole-{_task}", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "policy_type": "motor", + # "weights_scale": 0.2, + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) - ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_dmp['name'] = f"cartpole-two_poles" -kwargs_dict_cartpole2poles_dmp['camera_id'] = 0 +kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles" kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 +# TODO: weight scale 50, but goal scale 0.1 +kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10 _env_id = f'dmc_cartpole-two_poles_dmp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"cartpole-two_poles", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.TwoPolesMPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "weights_scale": 50, - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + kwargs=kwargs_dict_cartpole2poles_dmp + # { + # "name": f"cartpole-two_poles", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.TwoPolesMPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "weights_scale": 50, + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole2poles_promp['name'] = f"cartpole-two_poles" -kwargs_dict_cartpole2poles_promp['camera_id'] = 0 +kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles" kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10 @@ -363,70 +371,71 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale'] _env_id = f'dmc_cartpole-two_poles_promp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"cartpole-two_poles", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.TwoPolesMPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_cartpole2poles_promp + # { + # "name": f"cartpole-two_poles", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.TwoPolesMPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "policy_type": "motor", + # "weights_scale": 0.2, + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_dmp['name'] = f"cartpole-three_poles" -kwargs_dict_cartpole3poles_dmp['camera_id'] = 0 +kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles" kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 +# TODO: weight scale 50, but goal scale 0.1 +kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10 _env_id = f'dmc_cartpole-three_poles_dmp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"cartpole-three_poles", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.ThreePolesMPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "motor", - "weights_scale": 50, - "goal_scale": 0.1, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + kwargs=kwargs_dict_cartpole3poles_dmp + # { + # "name": f"cartpole-three_poles", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.ThreePolesMPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "motor", + # "weights_scale": 50, + # "goal_scale": 0.1, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_cartpole3poles_promp['name'] = f"cartpole-three_poles" -kwargs_dict_cartpole3poles_promp['camera_id'] = 0 +kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles" kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10 @@ -434,81 +443,85 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale'] _env_id = f'dmc_cartpole-three_poles_promp-v0' register( id=_env_id, - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"cartpole-three_poles", - # "time_limit": 1, - "camera_id": 0, - "episode_length": 1000, - "wrappers": [suite.cartpole.ThreePolesMPWrapper], - "traj_gen_kwargs": { - "num_dof": 1, - "num_basis": 5, - "duration": 10, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "policy_kwargs": { - "p_gains": 10, - "d_gains": 10 - } - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_cartpole3poles_promp + # { + # "name": f"cartpole-three_poles", + # # "time_limit": 1, + # "camera_id": 0, + # "episode_length": 1000, + # "wrappers": [suite.cartpole.ThreePolesMPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 1, + # "num_basis": 5, + # "duration": 10, + # "policy_type": "motor", + # "weights_scale": 0.2, + # "zero_start": True, + # "policy_kwargs": { + # "p_gains": 10, + # "d_gains": 10 + # } + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # DeepMind Manipulation kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_dmp['name'] = f"manipulation-reach_site_features" +kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features" kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2 -kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 +# TODO: weight scale 50, but goal scale 0.1 +kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity' register( id=f'dmc_manipulation-reach_site_dmp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # max_episode_steps=1, - kwargs={ - "name": f"manipulation-reach_site_features", - # "time_limit": 1, - "episode_length": 250, - "wrappers": [manipulation.reach_site.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 9, - "num_basis": 5, - "duration": 10, - "learn_goal": True, - "alpha_phase": 2, - "bandwidth_factor": 2, - "policy_type": "velocity", - "weights_scale": 50, - "goal_scale": 0.1, - } - } + kwargs=kwargs_dict_mani_reach_site_features_dmp + # { + # "name": f"manipulation-reach_site_features", + # # "time_limit": 1, + # "episode_length": 250, + # "wrappers": [manipulation.reach_site.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 9, + # "num_basis": 5, + # "duration": 10, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "velocity", + # "weights_scale": 50, + # "goal_scale": 0.1, + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP) -kwargs_dict_mani_reach_site_features_promp['name'] = f"manipulation-reach_site_features" +kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features" kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity' register( id=f'dmc_manipulation-reach_site_promp-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": f"manipulation-reach_site_features", - # "time_limit": 1, - "episode_length": 250, - "wrappers": [manipulation.reach_site.MPWrapper], - "traj_gen_kwargs": { - "num_dof": 9, - "num_basis": 5, - "duration": 10, - "policy_type": "velocity", - "weights_scale": 0.2, - "zero_start": True, - } - } + entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', + kwargs=kwargs_dict_mani_reach_site_features_promp + # { + # "name": f"manipulation-reach_site_features", + # # "time_limit": 1, + # "episode_length": 250, + # "wrappers": [manipulation.reach_site.MPWrapper], + # "traj_gen_kwargs": { + # "num_dof": 9, + # "num_basis": 5, + # "duration": 10, + # "policy_type": "velocity", + # "weights_scale": 0.2, + # "zero_start": True, + # } + # } ) -ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") +ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py index aa6c7aa..f4596f8 100644 --- a/alr_envs/dmc/dmc_wrapper.py +++ b/alr_envs/dmc/dmc_wrapper.py @@ -2,17 +2,22 @@ # License: MIT # Copyright (c) 2020 Denis Yarats import collections -from typing import Any, Dict, Tuple +from collections.abc import MutableMapping +from typing import Any, Dict, Tuple, Optional, Union, Callable +from dm_control import composer +import gym import numpy as np -from dm_control import manipulation, suite +from dm_control.rl import control from dm_env import specs -from gym import core, spaces +from gym import spaces +from gym.core import ObsType def _spec_to_box(spec): def extract_min_max(s): - assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" + assert s.dtype == np.float64 or s.dtype == np.float32, \ + f"Only float64 and float32 types are allowed, instead {s.dtype} was found" dim = int(np.prod(s.shape)) if type(s) == specs.Array: bound = np.inf * np.ones(dim, dtype=s.dtype) @@ -32,7 +37,7 @@ def _spec_to_box(spec): return spaces.Box(low, high, dtype=s.dtype) -def _flatten_obs(obs: collections.MutableMapping): +def _flatten_obs(obs: MutableMapping): """ Flattens an observation of type MutableMapping, e.g. a dict to a 1D array. Args: @@ -42,7 +47,7 @@ def _flatten_obs(obs: collections.MutableMapping): """ - if not isinstance(obs, collections.MutableMapping): + if not isinstance(obs, MutableMapping): raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.') # Keep key order consistent for non OrderedDicts @@ -52,47 +57,19 @@ def _flatten_obs(obs: collections.MutableMapping): return np.concatenate(obs_vals) -class DMCWrapper(core.Env): - def __init__( - self, - domain_name: str, - task_name: str, - task_kwargs: dict = {}, - visualize_reward: bool = True, - from_pixels: bool = False, - height: int = 84, - width: int = 84, - camera_id: int = 0, - frame_skip: int = 1, - environment_kwargs: dict = None, - channels_first: bool = True - ): - assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.' - self._from_pixels = from_pixels - self._height = height - self._width = width - self._camera_id = camera_id - self._frame_skip = frame_skip - self._channels_first = channels_first +class DMCWrapper(gym.Env): + def __init__(self, + env: Callable[[], Union[composer.Environment, control.Environment]], + ): - # create task - if domain_name == "manipulation": - assert not from_pixels and not task_name.endswith("_vision"), \ - "TODO: Vision interface for manipulation is different to suite and needs to be implemented" - self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random']) - else: - self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, - visualize_reward=visualize_reward, environment_kwargs=environment_kwargs) + # TODO: Currently this is required to be a function because dmc does not allow to copy composers environments + self._env = env() # action and observation space self._action_space = _spec_to_box([self._env.action_spec()]) self._observation_space = _spec_to_box(self._env.observation_spec().values()) - self._last_state = None - self.viewer = None - - # set seed - self.seed(seed=task_kwargs.get('random', 1)) + self._window = None def __getattr__(self, item): """Propagate only non-existent properties to wrapped env.""" @@ -103,17 +80,7 @@ class DMCWrapper(core.Env): return getattr(self._env, item) def _get_obs(self, time_step): - if self._from_pixels: - obs = self.render( - mode="rgb_array", - height=self._height, - width=self._width, - camera_id=self._camera_id - ) - if self._channels_first: - obs = obs.transpose(2, 0, 1).copy() - else: - obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) + obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) return obs @property @@ -126,20 +93,7 @@ class DMCWrapper(core.Env): @property def dt(self): - return self._env.control_timestep() * self._frame_skip - - @property - def base_step_limit(self): - """ - Returns: max_episode_steps of the underlying DMC env - - """ - # Accessing private attribute because DMC does not expose time_limit or step_limit. - # Only the current time_step/time as well as the control_timestep can be accessed. - try: - return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip - except AttributeError as e: - return self._env._time_limit / self.dt + return self._env.control_timestep() def seed(self, seed=None): self._action_space.seed(seed) @@ -147,56 +101,71 @@ class DMCWrapper(core.Env): def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]: assert self._action_space.contains(action) - reward = 0 extra = {'internal_state': self._env.physics.get_state().copy()} - for _ in range(self._frame_skip): - time_step = self._env.step(action) - reward += time_step.reward or 0. - done = time_step.last() - if done: - break - - self._last_state = _flatten_obs(time_step.observation) + time_step = self._env.step(action) + reward = time_step.reward or 0. + done = time_step.last() obs = self._get_obs(time_step) extra['discount'] = time_step.discount + return obs, reward, done, extra - def reset(self) -> np.ndarray: + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, + options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: time_step = self._env.reset() - self._last_state = _flatten_obs(time_step.observation) obs = self._get_obs(time_step) return obs - def render(self, mode='rgb_array', height=None, width=None, camera_id=0): - if self._last_state is None: - raise ValueError('Environment not ready to render. Call reset() first.') - - camera_id = camera_id or self._camera_id + def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False, + segmentation=False, scene_option=None, render_flag_overrides=None): # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode if mode == "rgb_array": - height = height or self._height - width = width or self._width - return self._env.physics.render(height=height, width=width, camera_id=camera_id) + return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays, + depth=depth, segmentation=segmentation, scene_option=scene_option, + render_flag_overrides=render_flag_overrides) - elif mode == 'human': - if self.viewer is None: - # pylint: disable=import-outside-toplevel - # pylint: disable=g-import-not-at-top - from gym.envs.classic_control import rendering - self.viewer = rendering.SimpleImageViewer() - # Render max available buffer size. Larger is only possible by altering the XML. - img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight, - width=self._env.physics.model.vis.global_.offwidth, - camera_id=camera_id) - self.viewer.imshow(img) - return self.viewer.isopen + # Render max available buffer size. Larger is only possible by altering the XML. + img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight, + width=self._env.physics.model.vis.global_.offwidth, + camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation, + scene_option=scene_option, render_flag_overrides=render_flag_overrides) + + if depth: + img = np.dstack([img.astype(np.uint8)] * 3) + + if mode == 'human': + try: + import cv2 + if self._window is None: + self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE) + + cv2.imshow(self.id, img[..., ::-1]) # Image in BGR + cv2.waitKey(1) + except ImportError: + import pygame + img = img.transpose((1, 0, 2)) + if self._window is None: + pygame.init() + pygame.display.init() + self._window = pygame.display.set_mode(img.shape[:2]) + + self._window.blit(pygame.surfarray.make_surface(img), (0, 0)) + pygame.event.pump() + pygame.display.flip() def close(self): super().close() - if self.viewer is not None and self.viewer.isopen: - self.viewer.close() + if self._window is not None: + try: + import cv2 + cv2.destroyWindow(self.id) + except ImportError: + import pygame + + pygame.display.quit() + pygame.quit() @property def reward_range(self) -> Tuple[float, float]: @@ -204,3 +173,8 @@ class DMCWrapper(core.Env): if isinstance(reward_spec, specs.BoundedArray): return reward_spec.minimum, reward_spec.maximum return -float('inf'), float('inf') + + @property + def metadata(self): + return {'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': round(1.0 / self._env.control_timestep())} diff --git a/alr_envs/examples/examples_movement_primitives.py b/alr_envs/examples/examples_movement_primitives.py index df8c44a..5af1bbe 100644 --- a/alr_envs/examples/examples_movement_primitives.py +++ b/alr_envs/examples/examples_movement_primitives.py @@ -1,3 +1,5 @@ +import numpy as np + import alr_envs @@ -59,7 +61,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render """ # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. # E.g. here for way to many basis functions - env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) + # env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) + env = alr_envs.make(env_name, seed) # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) @@ -72,15 +75,16 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render # number of samples/full trajectories (multiple environment steps) for i in range(iterations): - ac = env.action_space.sample() * 1000 + ac = env.action_space.sample() obs, reward, done, info = env.step(ac) rewards += reward if done: - print(rewards) + print(i, rewards) rewards = 0 obs = env.reset() - print(obs) + + return obs def example_fully_custom_mp(seed=1, iterations=1, render=True): @@ -139,7 +143,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): if __name__ == '__main__': - render = True + render = False # # DMP # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # @@ -150,7 +154,7 @@ if __name__ == '__main__': # example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) # Altered basis functions - example_custom_mp("HopperJumpSparseProMP-v0", seed=10, iterations=10, render=render) + obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render) # Custom MP # example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index 97d8197..6ccd622 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -36,7 +36,7 @@ for _task in _goal_change_envs: _env_id = f'{name}ProMP-{task_id_split[-1]}' kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_change_promp['name'] = _task + kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}' register( id=_env_id, @@ -52,7 +52,7 @@ for _task in _object_change_envs: _env_id = f'{name}ProMP-{task_id_split[-1]}' kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper) - kwargs_dict_object_change_promp['name'] = _task + kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}' register( id=_env_id, entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', @@ -77,7 +77,7 @@ for _task in _goal_and_object_change_envs: _env_id = f'{name}ProMP-{task_id_split[-1]}' kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_object_change_promp['name'] = _task + kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}' register( id=_env_id, @@ -93,7 +93,7 @@ for _task in _goal_and_endeffector_change_envs: _env_id = f'{name}ProMP-{task_id_split[-1]}' kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper) - kwargs_dict_goal_and_endeffector_change_promp['name'] = _task + kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}' register( id=_env_id, diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py index 601e3b1..4542aae 100644 --- a/alr_envs/open_ai/__init__.py +++ b/alr_envs/open_ai/__init__.py @@ -27,7 +27,6 @@ DEFAULT_BB_DICT_ProMP = { } } - kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6 kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075 @@ -35,7 +34,7 @@ kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6 kwargs_dict_reacher_promp['name'] = "Reacher-v2" kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper) register( - id='Reacher2dProMP-v2', + id='ReacherProMP-v2', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_reacher_promp ) diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index 531c3dd..8b13789 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -1,65 +1 @@ -import re -from typing import Union -import gym -from gym.envs.registration import register - -from alr_envs.utils.make_env_helpers import make - - -def make_dmc( - id: str, - seed: int = 1, - visualize_reward: bool = True, - from_pixels: bool = False, - height: int = 84, - width: int = 84, - camera_id: int = 0, - frame_skip: int = 1, - episode_length: Union[None, int] = None, - environment_kwargs: dict = {}, - time_limit: Union[None, float] = None, - channels_first: bool = True -): - # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py - # License: MIT - # Copyright (c) 2020 Denis Yarats - - if not re.match(r"\w+-\w+", id): - raise ValueError("env_id does not have the following structure: 'domain_name-task_name'") - domain_name, task_name = id.split("-") - - env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' - - if from_pixels: - assert not visualize_reward, 'Cannot use visualize reward when learning from pixels.' - - # Default lengths for benchmarking suite is 1000 and for manipulation tasks 250 - episode_length = episode_length or (250 if domain_name == "manipulation" else 1000) - - max_episode_steps = (episode_length + frame_skip - 1) // frame_skip - if env_id not in gym.envs.registry.env_specs: - task_kwargs = {'random': seed} - # if seed is not None: - # task_kwargs['random'] = seed - if time_limit is not None: - task_kwargs['time_limit'] = time_limit - register( - id=env_id, - entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper', - kwargs=dict( - domain_name=domain_name, - task_name=task_name, - task_kwargs=task_kwargs, - environment_kwargs=environment_kwargs, - visualize_reward=visualize_reward, - from_pixels=from_pixels, - height=height, - width=width, - camera_id=camera_id, - frame_skip=frame_skip, - channels_first=channels_first, - ), - max_episode_steps=max_episode_steps, - ) - return gym.make(env_id) diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 8317fda..b98fa66 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -1,20 +1,41 @@ -import warnings +import re +import uuid +from collections.abc import MutableMapping from copy import deepcopy -from typing import Iterable, Type, Union, MutableMapping +from math import ceil +from typing import Iterable, Type, Union import gym import numpy as np -from gym.envs.registration import EnvSpec, registry + +import alr_envs + +try: + from dm_control import suite, manipulation, composer + from dm_control.rl import control +except ImportError: + pass + +try: + import metaworld +except Exception: + # catch Exception due to Mujoco-py + pass + +from gym.envs.registration import registry +from gym.envs.registration import register from gym.wrappers import TimeAwareObservation from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper -from alr_envs.black_box.factory.controller_factory import get_controller from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator +from alr_envs.black_box.factory.controller_factory import get_controller from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.utils.utils import nested_update +ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym'] + def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): """ @@ -70,57 +91,25 @@ def _make(env_id: str, seed, **kwargs): # env_id.split(':') # if 'dmc' : - try: - # This access is required to allow for nested dict updates for BB envs - spec = registry.get(env_id) - all_kwargs = deepcopy(spec.kwargs) - nested_update(all_kwargs, kwargs) - kwargs = all_kwargs - - # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment. - if env_id.startswith("dmc"): - kwargs.update({"seed": seed}) - - # Gym - env = gym.make(env_id, **kwargs) - env.seed(seed) - env.action_space.seed(seed) - env.observation_space.seed(seed) - except (gym.error.Error, AttributeError): + if ':' in env_id: + split_id = env_id.split(':') + framework, env_id = split_id[-2:] + else: + framework = None + if framework == 'metaworld': # MetaWorld env - import metaworld - if env_id in metaworld.ML1.ENV_NAMES: - env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs) - - # setting this avoids generating the same initialization after each reset - env._freeze_rand_vec = False - env.seeded_rand_vec = True - - # Manually set spec, as metaworld environments are not registered via gym - env.unwrapped.spec = EnvSpec(env_id) - # Set Timelimit based on the maximum allowed path length of the environment - env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length) - # env.seed(seed) - # env.action_space.seed(seed) - # env.observation_space.seed(seed) - # env.goal_space.seed(seed) - - else: - # DMC - from alr_envs import make_dmc - env = make_dmc(env_id, seed=seed, **kwargs) - - if not env.base_step_limit == env.spec.max_episode_steps: - raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym " - f"is different from the DMC environment specification of {env.base_step_limit} steps.") + env = make_metaworld(env_id, seed=seed, **kwargs) + elif framework == 'dmc': + # DeepMind Controlp + env = make_dmc(env_id, seed=seed, **kwargs) + else: + env = make_gym(env_id, seed=seed, **kwargs) return env -def _make_wrapped_env( - env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs -): +def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs): """ Helper function for creating a wrapped gym environment using MPs. It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is @@ -149,7 +138,7 @@ def _make_wrapped_env( def make_bb( env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, - controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1, + controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1, **kwargs): """ This can also be used standalone for manually building a custom DMP environment. @@ -167,7 +156,6 @@ def make_bb( """ _verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None)) - _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories') do_replanning = black_box_kwargs.get('replanning_schedule') @@ -176,12 +164,16 @@ def make_bb( if learn_sub_trajs or do_replanning: # add time_step observation when replanning - kwargs['wrappers'].append(TimeAwareObservation) + if not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']): + # Add as first wrapper in order to alter observation + kwargs['wrappers'].insert(0, TimeAwareObservation) - traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item()) + env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + + traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item()) if black_box_kwargs.get('duration') is None: - black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt + black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt if phase_kwargs.get('tau') is None: phase_kwargs['tau'] = black_box_kwargs['duration'] @@ -194,7 +186,7 @@ def make_bb( controller = get_controller(**controller_kwargs) traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs) - bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller, + bb_env = BlackBoxWrapper(env, trajectory_generator=traj_gen, tracking_controller=controller, **black_box_kwargs) return bb_env @@ -249,6 +241,109 @@ def make_bb_env_helper(**kwargs): basis_kwargs=basis_kwargs, **kwargs, seed=seed) +def make_dmc( + env_id: Union[str, composer.Environment, control.Environment], + seed: int = None, + visualize_reward: bool = True, + time_limit: Union[None, float] = None, + **kwargs +): + if not re.match(r"\w+-\w+", env_id): + raise ValueError("env_id does not have the following structure: 'domain_name-task_name'") + domain_name, task_name = env_id.split("-") + + if task_name.endswith("_vision"): + # TODO + raise ValueError("The vision interface for manipulation tasks is currently not supported.") + + if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL: + raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.') + + # env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' + gym_id = uuid.uuid4().hex + '-v1' + + task_kwargs = {'random': seed} + if time_limit is not None: + task_kwargs['time_limit'] = time_limit + + # create task + # Accessing private attribute because DMC does not expose time_limit or step_limit. + # Only the current time_step/time as well as the control_timestep can be accessed. + if domain_name == "manipulation": + env = manipulation.load(environment_name=task_name, seed=seed) + max_episode_steps = ceil(env._time_limit / env.control_timestep()) + else: + env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, + visualize_reward=visualize_reward, environment_kwargs=kwargs) + max_episode_steps = int(env._step_limit) + + register( + id=gym_id, + entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper', + kwargs={'env': lambda: env}, + max_episode_steps=max_episode_steps, + ) + + env = gym.make(gym_id) + env.seed(seed=seed) + return env + + +def make_metaworld(env_id, seed, **kwargs): + if env_id not in metaworld.ML1.ENV_NAMES: + raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.') + + _env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs) + + # setting this avoids generating the same initialization after each reset + _env._freeze_rand_vec = False + # New argument to use global seeding + _env.seeded_rand_vec = True + + # Manually set spec, as metaworld environments are not registered via gym + # _env.unwrapped.spec = EnvSpec(env_id) + # Set Timelimit based on the maximum allowed path length of the environment + # _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length) + # _env.seed(seed) + # _env.action_space.seed(seed) + # _env.observation_space.seed(seed) + # _env.goal_space.seed(seed) + + gym_id = uuid.uuid4().hex + '-v1' + + register( + id=gym_id, + entry_point=lambda: _env, + max_episode_steps=_env.max_path_length, + ) + + # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld + env = gym.make(gym_id, disable_env_checker=True) + env.seed(seed=seed) + return env + + +def make_gym(env_id, seed, **kwargs): + # This access is required to allow for nested dict updates for BB envs + spec = registry.get(env_id) + all_kwargs = deepcopy(spec.kwargs) + nested_update(all_kwargs, kwargs) + kwargs = all_kwargs + + # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment. + # if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']): + all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), []) + if env_id.startswith("dmc") or env_id in all_bb_envs: + kwargs.update({"seed": seed}) + + # Gym + env = gym.make(env_id, **kwargs) + env.seed(seed) + env.action_space.seed(seed) + env.observation_space.seed(seed) + return env + + def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): """ When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. diff --git a/test/test_bb_envs.py b/test/test_bb_envs.py index 189dbb6..49eb31e 100644 --- a/test/test_bb_envs.py +++ b/test/test_bb_envs.py @@ -40,9 +40,9 @@ class TestMPEnvironments(unittest.TestCase): for i in range(iterations): observations.append(obs) - ac = env.action_space.sample() + actions = env.action_space.sample() # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) - obs, reward, done, info = env.step(ac) + obs, reward, done, info = env.step(actions) self._verify_observations(obs, env.observation_space, "step()") self._verify_reward(reward) @@ -55,13 +55,13 @@ class TestMPEnvironments(unittest.TestCase): env.render("human") if done: - obs = env.reset() + break - assert done, "Done flag is not True after max episode length." + assert done, "Done flag is not True after end of episode." observations.append(obs) env.close() del env - return np.array(observations), np.array(rewards), np.array(dones) + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) def _run_env_determinism(self, ids): seed = 0 @@ -70,8 +70,9 @@ class TestMPEnvironments(unittest.TestCase): traj1 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed) for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, obs2, rwd2, done2 = time_step - self.assertTrue(np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") @@ -81,7 +82,7 @@ class TestMPEnvironments(unittest.TestCase): f"not contained in observation space {observation_space}.") def _verify_reward(self, reward): - self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") @@ -113,12 +114,12 @@ class TestMPEnvironments(unittest.TestCase): def test_dmc_environment_functionality(self): """Tests that environments runs without errors using random actions for DMC MP envs.""" with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: + for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']: with self.subTest(msg=env_id): self._run_env(env_id) with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: + for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']: with self.subTest(msg=env_id): self._run_env(env_id) @@ -151,9 +152,9 @@ class TestMPEnvironments(unittest.TestCase): def test_dmc_environment_determinism(self): """Tests that identical seeds produce identical trajectories for DMC MP Envs.""" with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) + self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"]) with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) + self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"]) def test_metaworld_environment_determinism(self): """Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py index d367f49..a90814d 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc_envs.py @@ -7,8 +7,8 @@ from dm_control import suite, manipulation from alr_envs import make -DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] -MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] SEED = 1 @@ -29,9 +29,11 @@ class TestStepDMCEnvironments(unittest.TestCase): Returns: """ + print(env_id) env: gym.Env = make(env_id, seed=seed) rewards = [] observations = [] + actions = [] dones = [] obs = env.reset() self._verify_observations(obs, env.observation_space, "reset()") @@ -43,6 +45,7 @@ class TestStepDMCEnvironments(unittest.TestCase): observations.append(obs) ac = env.action_space.sample() + actions.append(ac) # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) obs, reward, done, info = env.step(ac) @@ -57,13 +60,13 @@ class TestStepDMCEnvironments(unittest.TestCase): env.render("human") if done: - obs = env.reset() + break - assert done, "Done flag is not True after max episode length." + assert done, "Done flag is not True after end of episode." observations.append(obs) env.close() del env - return np.array(observations), np.array(rewards), np.array(dones) + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) def _verify_observations(self, obs, observation_space, obs_type="reset()"): self.assertTrue(observation_space.contains(obs), @@ -71,7 +74,7 @@ class TestStepDMCEnvironments(unittest.TestCase): f"not contained in observation space {observation_space}.") def _verify_reward(self, reward): - self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") @@ -91,8 +94,9 @@ class TestStepDMCEnvironments(unittest.TestCase): traj1 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed) for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, obs2, rwd2, done2 = time_step - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") @@ -111,11 +115,11 @@ class TestStepDMCEnvironments(unittest.TestCase): traj1 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed) for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, obs2, rwd2, done2 = time_step - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") if __name__ == '__main__': diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py index bfe6a9e..ac2a013 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_metaworld_envs.py @@ -6,7 +6,7 @@ import numpy as np from alr_envs import make from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE -ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] SEED = 1 @@ -57,9 +57,9 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): env.render("human") if done: - obs = env.reset() + break - assert done, "Done flag is not True after max episode length." + assert done, "Done flag is not True after end of episode." observations.append(obs) env.close() del env @@ -71,7 +71,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): f"not contained in observation space {observation_space}.") def _verify_reward(self, reward): - self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") @@ -94,7 +94,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")